9 files changed, 245010 insertions, 0 deletions
diff --git a/library/stdarch/crates/stdarch-verify/Cargo.toml b/library/stdarch/crates/stdarch-verify/Cargo.toml
new file mode 100644
index 000000000..6362e3d57
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "stdarch-verify"
+version = "0.1.0"
+authors = ["Alex Crichton <alex@alexcrichton.com>"]
+edition = "2018"
+
+[dependencies]
+proc-macro2 = "1.0"
+quote = "1.0"
+syn = { version = "1.0", features = ["full"] }
+
+[lib]
+proc-macro = true
+test = false
+
+[dev-dependencies]
+serde = { version = "1.0", features = ['derive'] }
+serde-xml-rs = "0.3"
+html5ever = "0.23.0"
diff --git a/library/stdarch/crates/stdarch-verify/arm-intrinsics.html b/library/stdarch/crates/stdarch-verify/arm-intrinsics.html
new file mode 100644
index 000000000..ac246c6ba
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/arm-intrinsics.html
@@ -0,0 +1,93399 @@
+
+
+<!DOCTYPE html>
+<html class="page no-js" lang="en">
+<head>
+    <script>
+        if ((window.location.href.indexOf('<') || window.location.href.indexOf('>')) > -1) {
+            window.location.href = window.location.href.replace(/<.*>/g, '');            
+        }
+    </script>
+
+    <title>Technologies | NEON Intrinsics Reference – Arm Developer</title>
+    
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta http-equiv="X-UA-Compatible" content="IE=Edge">
+<meta content="en" http-equiv="content-language">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta name="author" content="Arm Ltd.">
+<meta name="description" content="All the NEON intrinsics reference in an interactive page.">
+<meta name="keywords" content="">
+<meta content="Copyright &#169; 1995-2018 Arm Limited (or its affiliates). All rights reserved." name="copyright">
+<meta name="apple-mobile-web-app-capable" content="yes">
+<meta name="msapplication-config" content="https://developer.arm.com:443/shared/common/img/favicon/browserconfig.xml">
+<meta name="msapplication-TileColor" content="#2b5797">
+<meta name="msapplication-TileImage" content="https://developer.arm.com:443/shared/common/img/favicon/mstile-144x144.png">
+<meta name="theme-color" content="#ffffff">
+<meta name="server" content="ARMGPCD2" />
+
+<meta property="og:title" content="Technologies | NEON Intrinsics Reference – Arm Developer">
+<meta property="og:description" content="All the NEON intrinsics reference in an interactive page.">
+<meta property="og:image" content="https://developer.arm.com:443">
+<meta property="og:site_name" content="ARM Developer">
+<meta property="og:url" content="https://developer.arm.com/technologies/neon/intrinsics">
+<meta property="og:type" content="website">
+<meta property="og:locale" content="en">
+
+<meta property="article:author" content="Arm Ltd.">
+<meta property="article:publisher" content="Arm Ltd.">
+
+<meta name="twitter:card" content="summary">
+<meta name="twitter:site" content="ARM Developer">
+<meta name="twitter:title" content="Technologies | NEON Intrinsics Reference – Arm Developer">
+<meta name="twitter:description" content="All the NEON intrinsics reference in an interactive page.">
+<meta name="twitter:image" content="https://developer.arm.com:443">
+<meta name="twitter:url" content="https://developer.arm.com/technologies/neon/intrinsics">
+
+<meta itemprop="name" content="Technologies | NEON Intrinsics Reference – Arm Developer">
+<meta itemprop="description" content="All the NEON intrinsics reference in an interactive page.">
+<meta itemprop="image" content="https://developer.arm.com:443">
+
+
+    
+
+    <link rel="stylesheet" type="text/css" href="/shared/developer.arm.com/css/app.css?v=D41D8CD98F00B204E9800998ECF8427E" />
+
+    
+
+<link rel="apple-touch-icon" sizes="57x57" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="60x60" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="72x72" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="76x76" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="114x114" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="120x120" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="144x144" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="152x152" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="apple-touch-icon" sizes="180x180" href="https://developer.arm.com/shared/common/img/favicon/apple-touch-icon.png?v=2.29.0.0" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-32x32.png?v=2.29.0.0" sizes="32x32" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-48x48.png?v=2.29.0.0" sizes="48x48" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/android-chrome-192x192.png?v=2.29.0.0" sizes="192x192" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/android-chrome-256x256.png?v=2.29.0.0" sizes="256x256" />
+<link rel="icon" type="image/png" href="https://developer.arm.com/shared/common/img/favicon/favicon-16x16.png?v=2.29.0.0" sizes="16x16" />
+<link rel="shortcut icon" type="image/ico" href="https://developer.arm.com/shared/common/img/favicon/favicon.ico?v=2.29.0.0" />
+<link rel="manifest" href="https://developer.arm.com/shared/common/img/favicon/manifest.json?v=2.29.0.0" />
+
+    <link rel="search" type="application/opensearchdescription+xml" title="ARM Developer" href="/opensearch.xml"/>
+
+    
+    
+
+
+<!-- Google Tag Manager -->
+<script>
+(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
+new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
+j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
+'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
+    })(window, document, 'script', 'dataLayer', 'GTM-K25LQR');
+</script>
+<!-- End Google Tag Manager -->
+
+
+</head>
+<body id="">
+    
+    
+
+<noscript>
+    <iframe src="https://www.googletagmanager.com/ns.html?id=GTM-K25LQR" height="0" width="0" style="display:none;visibility:hidden"></iframe>
+</noscript>
+
+
+    
+    <div class="c-feedback-message-container u-no-print"><style>
+/* Docs top margin fix */
+#content.arm-docs .section-wrapper h1 {
+    padding-top: 0 !important;
+}
+#content.arm-docs .section-wrapper {
+    margin-top: 2em;
+}
+</style>
+<style>
+    .top-bar-section .has-dropdown ul.dropdown a.active {
+      box-shadow: none;
+    }
+</style>
+<div id="modal-welcome" class="reveal-modal" data-reveal>
+  <p class="lead">ARM’s developer website includes documentation, tutorials, support resources and more.</p>
+  <p>Over the next few months we will be adding more developer resources and documentation for all the products and technologies that ARM provides.</p>
+  <a class="close-reveal-modal button" style="position: relative; float: right; color: white; font-size: 1em;">Close</a>
+</div>
+
+<!-- Hot fix for accordion icons -->
+<style>
+.accordion .accordion-navigation > a .accordion-icon:before {
+    content: "\f196" !important;
+}
+.accordion .accordion-navigation.active > a .accordion-icon:before {
+    content: "\f147" !important;
+}
+.accordion .accordion-navigation > a .accordion-icon {
+    font-size: 1em !important;
+    vertical-align: 0 !important;
+}
+</style></div>
+
+    
+    
+
+    
+    
+
+<div class="c-skip-navigation u-no-print">
+    <a href="#content" aria-label="Clik here to skip to Main Content">Skip to Main Content</a>
+    <a href="#footer" aria-label="Clik here to skip to Footer Navigation">Skip to Footer Navigation</a>
+</div>
+
+<div class="c-unsupported-browser-message u-no-print text-center old-ie-version">
+    <p>
+        <strong>
+            Sorry, your browser is not supported. <a href="https://whatbrowser.org/" target="_blank"> We recommend upgrading your browser</a>.
+            We have done our best to make all the documentation and resources available on old versions of Internet Explorer, but vector image support and the layout may not be optimal. Technical documentation is available as a PDF Download.
+        </strong>
+    </p>
+</div>
+
+<noscript>
+    <div class="js-disabled-message u-no-print text-center">
+        <p>
+            <strong>JavaScript seems to be disabled in your browser.</strong><br />
+            You must have JavaScript enabled in your browser to utilize the functionality of this website.
+        </p>
+    </div>
+</noscript>
+
+
+<div class="c-notifications-wrapper">
+
+    
+    <style>
+        .c-notifications-wrapper {
+            background-color: #e5eceb;
+            position: relative;
+            z-index: 99;
+        }
+    </style>
+
+    
+    
+
+
+    
+    <script>
+        (function() {
+            function setHeight() {
+                var $notification = document.querySelector('.c-notification');
+                if (!$notification) return;
+                var computedStyles = getComputedStyle($notification);
+                var height = computedStyles.getPropertyValue('height');
+                var $parent = $notification.parentElement;
+                if ($parent) $parent.style.height = height;
+            }
+            function getCookie(cname) {
+                var name = cname + '=';
+                var decodedCookie = decodeURIComponent(document.cookie);
+                var ca = decodedCookie.split(';');
+                for(var i = 0; i <ca.length; i++) {
+                    var c = ca[i];
+                    while (c.charAt(0) == ' ') {
+                        c = c.substring(1);
+                    }
+                    if (c.indexOf(name) == 0) {
+                        return c.substring(name.length, c.length);
+                    }
+                }
+                return '';
+            }                   
+            var $closeNotification = document.querySelector('.c-notification__close');
+            var $notificationWrapper = document.querySelector('.c-notifications-wrapper');
+            if (!$notificationWrapper) return;
+            if (!$closeNotification) return;
+            $closeNotification.onclick = function() {    
+                $notificationWrapper.style.display = 'none';
+                var expiryDate = new Date();
+                expiryDate.setTime(expiryDate.getTime() + (3650 * 24 * 60 * 60 * 1000));
+                document.cookie ='acceptSesameCookie=true; expires=' + expiryDate + '; domain=.arm.com; path=/';
+            };
+            var acceptCookie = getCookie('acceptSesameCookie');
+            if (acceptCookie) {
+                $notificationWrapper.style.display = 'none';
+            }
+            window.addEventListener('resize', setHeight);
+            setHeight(); 
+        })();
+    </script>
+</div>
+
+
+<header class="c-header u-no-print" role="banner">
+    
+    <div class="arm-global-menu-wrapper">
+        <div class="arm-global-menu">
+
+            
+            <nav class="top-bar js-mobile-navigation" data-topbar="" role="navigation">
+
+                
+                <div class="global-menu">
+ 
+                    
+                    <ul class="title-area">
+
+                        
+                        <li class="navigation-dropdown">
+                            <span class="navigation-dropdown-label">
+                                <a href="/">
+                                    <span>
+                                        <img src="/shared/developer.arm.com/img/arm-developer.svg" alt="ARM Developer" />
+                                    </span>
+                                    <i class="fa fa-caret-down"></i>
+                                </a>
+                            </span>
+                            <ul class="navigation-dropdown-list">
+    <li class="navigation-dropdown-list-item">
+        <a href="/" title="Home"><span><i class="fa fa-home"></i> Home</span></a>
+    </li>
+    <li class="navigation-dropdown-list-divider"><span></span></li>    
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/embedded" title="Embedded Software Development"><span>Embedded Software Development</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/open-source" title="Linux &amp; Open Source"><span>Linux &amp; Open Source</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://www.arm.com/resources/education" title="Education"><span>Education</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://www.arm.com/resources/research" title="Research"><span>Research</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/graphics" title="Graphics &amp; Multimedia Development"><span>Graphics &amp; Multimedia Development</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/soc" title="SoC Design"><span>SoC Design</span></a>
+        </li>
+        <li class="navigation-dropdown-list-item">
+            <a href="https://developer.arm.com/hpc" title="High Performance Computing"><span>High Performance Computing</span></a>
+        </li>
+</ul>
+
+                        </li>
+
+                        
+                        <li class="menu">
+    <ul>
+            <li><a class="underline" href="https://developer.arm.com/products" title="Products">Products</a></li>
+            <li><a class="underline" href="https://developer.arm.com/solutions" title="Solutions">Solutions</a></li>
+            <li><a class="underline" href="https://developer.arm.com/technologies" title="Technologies">Technologies</a></li>
+            <li><a class="underline" href="https://developer.arm.com/support" title="Support">Support</a></li>
+    </ul>
+</li>
+
+
+                        
+                        <li class="search js-search">
+                                <div id="global-search-box">
+                                    <!-- When customizing this component, ensure to use "Coveo.$" instead of the regular jQuery "$" to
+     avoid any conflicts with Sitecore's Page Editor/Experience Editor.  -->
+
+<div>
+    <link rel="stylesheet" href="/Coveo/css/CoveoFullSearchNewDesign.css" />
+    <link rel="stylesheet" href="/Coveo/css/CoveoComponent.css" />
+    <link href="/shared/developer.arm.com/css/search.css" rel="stylesheet" />
+
+    <script type="text/javascript" src="/Coveo/js/CoveoJsSearch.WithDependencies.min.js"></script>
+    <script type="text/javascript" src="/Coveo/js/CoveoForSitecorePolyfills.js"></script>
+    <script type="text/javascript" src="/Coveo/js/CoveoForSitecore.js"></script>
+
+</div>
+<script src="/Coveo/js/CoveoForSitecoreOmniboxResultListFixCursor.js"></script>
+
+
+
+
+    <div>
+        <script type="text/javascript" src="/Coveo/js/cultures/en.js"></script>
+        <script type="text/javascript">
+            Coveo.$(function () {
+              
+                var searchbox = Coveo.$('#globalsearchbox');
+                if (typeof (CoveoForSitecore) !== 'undefined') {
+                    CoveoForSitecore.componentsOptions = {"analyticsCustomMetadata" : {"sitename" : "website" , "siteName" : "website" , "pageFullPath" : "/sitecore/content/developer/technologies/neon/intrinsics"},"analyticsEndpointUri" : "/coveo/rest/v6/analytics" , "boostExpressions" : "" , "clientLanguageFieldName" : "@z95xlanguage" , "clientLanguageName" : "en" , "defaultSortType" : "" , "defaultSortField" : "" , "defaultSortCriteriaNoSpace" : "Relevancy" , "defaultSortCriteriaLowercase" : "relevancy" , "enableClientSideLogging" : false,"externalCollections" : [],"externalSources" : [],"filterResultsOnCurrentCulture" : true,"filterExpression" : "NOT @templateid==(\"adb6ca4f-03ef-4f47-b9ac-9ce2ba53ff97\",\"fe5dd826-48c6-436d-b87a-7c4210c7413b\") AND @haslayout == 1 AND @z95xpath == \"110d559fdea542ea9c1c8a5df7e70ef9\"" , "id" : "coveoa8e6c6d9" , "indexSourceName" : "Coveo_web_index - DEVELOPER" , "isEditingInPageEditor" : false,"isPreviewingInPageEditor" : false,"isPreviewingInPageEditorWithSimulatedDevice" : false,"latestVersionFieldName" : "@z95xlatestversion" , "pageFullPath" : "/sitecore/content/developer/technologies/neon/intrinsics" , "pageName" : "intrinsics" , "restEndpointUri" : "/coveo/rest" , "searchboxPlaceholderText" : "" , "sendToSitecoreAnalytics" : false,"sitecoreItemId" : "eae5ffe5-224d-49c2-b491-93cad803b595" , "sitecoreItemUri" : "sitecore://web/{EAE5FFE5-224D-49C2-B491-93CAD803B595}?lang=en\u0026ver=2" , "siteName" : "website" , "searchRedirectionItemName" : "search" , "searchRedirectionUrl" : "/search" , "keepOmniboxSuggestionsProvidersDefaultOrdering" : false};
+                    searchbox.coveoForSitecore('initSearchbox',
+                        CoveoForSitecore.componentsOptions);
+                } else {
+                    Coveo.SearchEndpoint.endpoints["default"] = new Coveo.SearchEndpoint({"restUri" : "/coveo/rest" , "queryStringArguments" : {"sitecoreItemUri" : "sitecore://web/{EAE5FFE5-224D-49C2-B491-93CAD803B595}?lang=en\u0026ver=2" , "siteName" : "website"}});
+                    searchbox.coveo('initSearchbox',
+                        '/search');
+                }
+
+                 Coveo.$('#globalsearchbox').on("afterInitialization", function() {
+                var queryBox = Coveo.$(this).find("input.CoveoQueryBox");
+                if (!queryBox) {
+                    return;
+                }
+                queryBox.attr("placeholder", '');
+                queryBox.attr("aria-label", 'Search');
+            });
+                
+            });
+        </script>
+
+        <div id="globalsearchbox"
+                          >
+            <div class="CoveoAnalytics"
+                 data-anonymous="True"
+                 data-endpoint="/coveo/rest/coveoanalytics"
+                 data-search-hub="search"
+                 data-send-to-cloud="True">
+            </div>
+            <div class="CoveoSearchbox"
+                 data-auto-focus="True"
+                 data-enable-lowercase-operators="False"
+                 data-enable-partial-match="False"
+                 data-partial-match-keywords="5"
+                 data-partial-match-threshold="50%"
+                 data-enable-question-marks="False"
+                 data-enable-wildcards="False"
+                 data-enable-omnibox="true"
+                 data-omnibox-timeout="500"
+                 data-enable-field-addon="False"
+                 data-enable-simple-field-addon="False"
+                 data-enable-top-query-addon="False"
+                 data-enable-reveal-query-suggest-addon="False"
+                 data-enable-query-extension-addon="False"
+                 ></div>
+
+
+            
+
+
+
+
+    <script type="text/javascript">
+        
+       
+    </script>
+    <span class="CoveoForSitecoreOmniboxResultList"
+          data-header-title='Suggested Results'
+          data-query-expression=''
+          data-number-of-results='10'
+          data-result-template-id='globalsearchresults'>
+    </span>
+    <script id="globalsearchresults" class="result-template" type="text/x-underscore-template">
+        <div>
+            <a href="{{=clickUri}}" class="coveo-title">{{=title?highlight(title, titleHighlights):''}}</a>
+        </div>
+    </script>
+
+
+        </div>
+    </div>
+
+                                </div>
+                        </li>
+                
+                        
+                        
+
+                        
+                        <li class="menu-icon c-mobile-toggle c-mobile-search-toggle js-mobile-toggle" data-toggle="search">
+                            <a href="#" tabindex="0" title="Search" aria-label="Search" aria-haspopup="true">
+                                <i class="fa fa-search"></i><span class="sr-only">Search</span>
+                            </a>
+                        </li>
+
+                        
+                        
+
+                        
+                        <li class="menu-icon c-mobile-toggle c-mobile-navigation-toggle js-mobile-toggle" data-toggle="navigation">
+                            <a href="#" tabindex="0" title="Mobile Navigation" aria-label="Mobile Navigation" aria-haspopup="true">
+                                <i class="fa fa-bars"></i><span class="sr-only">Mobile Navigation</span>
+                            </a>
+                        </li>
+                
+                        
+                        <li class="developer-user-menu">
+                            
+
+<div id="c-65110123-4209-4daa-a4a3-335c8f0caa10" class="c-user-menu" role="navigation" aria-label="User menu">
+    <ul class="c-user-menu__items c-navigation__items c-user-menu__root" role="menubar">
+    <li class="c-user-menu__item" aria-haspopup="true" role="menuitem">
+        <a class="c-user-menu__toggle" title="User Menu" tabindex="0" aria-expanded="false">
+            <i class="fa fa-user" aria-hidden="true"></i>
+        </a>
+        <ul class="c-user-menu__items c-navigation__items c-user-menu__section is-aligned-right" aria-hidden="true" role="menu" tabindex="-1">
+                    <li class="c-user-menu__item" aria-haspopup="false" role="menuitem">
+                        <a href="/login?returnUrl=/technologies/neon/intrinsics" title="Login" tabindex="0" aria-expanded="false">Login</a>
+                    </li>
+                    <li class="c-user-menu__item" aria-haspopup="false" role="menuitem">
+                        <a href="/register?returnUrl=/technologies/neon/intrinsics" title="Register" tabindex="0" aria-expanded="false">Register</a>
+                    </li>
+        </ul>
+    </li>
+</ul>
+
+</div>
+
+<link rel="stylesheet" href="https://developer.arm.com/shared/arm-account/css/modules/user-menu.css?v=2.29.0.0" />
+
+                        </li>
+                    </ul>
+                    
+
+                </div>
+                
+
+                
+                <section class="arm-mobile-navigation top-bar-section hide-for-large-up">
+                    <ul class="right">
+    <li class="has-dropdown">
+        <a href="#">Main Menu</a>
+        <ul class="dropdown">
+                <li><a href="/products">Products</a></li>
+                <li><a href="/solutions">Solutions</a></li>
+                <li><a href="/technologies">Technologies</a></li>
+                <li><a href="/support">Support</a></li>
+        </ul>
+    </li>
+    <ul class="left"><li><a class="" href="/technologies">Overview</a></li><li><a class="" href="/technologies/big-little">big.LITTLE</a></li><li><a class="active" href="/technologies/neon">NEON</a></li><li><a class="" href="/embedded/cmsis">CMSIS</a></li><li class="has-dropdown"><a class="" href="/technologies/dsp">DSP</a><ul class="dropdown"><li><a class="" href="/technologies/dsp">DSP Overview</a></li><li><a class="" href="/technologies/dsp/arm-dsp-ecosystem-partners">Arm DSP ecosystem partners</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-r">DSP for Cortex-R</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-m">DSP for Cortex-M</a></li><li><a class="" href="/technologies/neon">NEON for Cortex-A and Cortex-R52</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm">Machine Learning on Arm</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material">Developer material</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides">How-to guides</a><ul class="dropdown"><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-onnx">Configuring the Arm NN SDK build environment for ONNX</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow">Configuring the Arm NN SDK build environment for TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow-lite">Configuring the Arm NN SDK build environment for TensorFlow Lite</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-model-on-openmv-using-cmsis-nn">Deploying a Caffe Model on OpenMV using CMSIS-NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-cloud-based-ml-for-speech-transcription">Deploying cloud-based ML for speech transcription</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-caffe">Configuring the Arm NN SDK build environment for Caffe</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-tensorflow-mnist-model-on-arm-nn">Deploying a TensorFlow MNIST model on Arm NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-mnist-model-using-the-arm-nn-sdk">Deploying a Caffe MNIST model using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/profiling-alexnet-on-raspberry-pi-and-hikey-960-with-the-compute-library">Profiling AlexNet on Raspberry Pi and HiKey 960 with the Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/optimizing-neural-networks-for-mobile-and-embedded-devices-with-tensorflow">Optimizing neural networks for mobile and embedded devices with TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/running-alexnet-on-raspberry-pi-with-compute-library">Running AlexNet on Raspberry Pi with Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/improving-your-machine-learning-workflow-using-the-arm-nn-sdk">Improving your machine learning workflow using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/quantizing-neural-networks-to-8-bit-using-tensorflow">Quantizing neural networks to 8-bit using TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/cross-compiling-arm-nn-for-the-raspberry-pi-and-tensorflow">Cross-compiling Arm NN for the Raspberry Pi and TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-yeah-world">Teach your Raspberry Pi - Yeah world</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-multi-gesture">Teach your Raspberry Pi - Multi-gesture</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/webinars">Webinars</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/software-for-machine-learning-on-arm">Software for Machine Learning on Arm</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/research-papers">Research papers</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/white-papers">White papers</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/ecosystem-partners">Ecosystem partners</a></li><li><a class="" href="https://community.arm.com/p/ml-blog">Blog</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm">Security on Arm</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-technologies">Arm technologies</a></li><li><a class="" href="/technologies/security-on-arm/how-do-i-implement">How do I implement</a></li><li><a class="" href="/technologies/security-on-arm/types-of-attack-and-counter-measures">Types of attack and counter-measures</a></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm/arm-security-developer-community">Arm Security Developer Community</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-security-developer-community/arm-security-partners">Arm Security Partners</a></li></ul></li></ul></li><li><a class="" href="/technologies/uefi-drivers">UEFI Drivers</a></li><li><a class="" href="/technologies/dynamiq">DynamIQ</a></li><li class="has-dropdown"><a class="" href="/technologies/graphics-technologies">Graphics Technologies</a><ul class="dropdown"><li><a class="" href="/technologies/graphics-technologies/adaptive-scalable-texture-compression">Adaptive Scalable Texture Compression</a></li><li><a class="" href="/technologies/graphics-technologies/arm-frame-buffer-compression">Arm Frame Buffer Compression</a></li><li><a class="" href="/technologies/graphics-technologies/transaction-elimination">Transaction Elimination</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/trustzone">TrustZone</a><ul class="dropdown"><li><a class="" href="/technologies/trustzone/webinar-how-to-implement-a-secure-iot-system-on-armv8-m">Webinar - How to implement a secure IoT system on Armv8-M</a></li></ul></li><li><a class="" href="/technologies/compute-library">Compute Library</a></li><li><a class="" href="/technologies/floating-point">Floating Point</a></li></ul>
+</ul>
+
+                </section>
+                
+
+            </nav>
+            
+
+            <script>
+                (function() {
+                    var $globalMenu = document.querySelector('.arm-global-menu');
+                    if (!$globalMenu) return;
+                    var computedStyles = getComputedStyle($globalMenu);
+                    var height = computedStyles.getPropertyValue('height');
+                    var $parent = $globalMenu.parentElement;
+                    if ($parent) $parent.style.height = height;
+                })();
+            </script>
+
+        </div>
+    </div>
+
+    
+<div class="main-header">
+    <div class="row">
+        <div class="large-12 columns">
+            <ul class="breadcrumbs">
+                <li>
+                    <a href="/" title="Home">Home</a>
+                </li>
+                <li>
+                    <a href="/technologies" title="Technologies">Technologies</a>
+                </li>
+                <li>
+                    <a href="/technologies/neon" title="NEON">NEON</a>
+                </li>
+                <li class="current">NEON Intrinsics Reference</li>
+</ul>
+
+
+<h1>NEON Intrinsics Reference</h1>        </div>
+    </div>
+</div>
+
+
+<div class="c-contextual-navigation-wrapper show-for-large-up">
+    <style>
+        .c-contextual-navigation-wrapper,
+        .c-contextual-navigation {
+            min-height: 55px;
+            position: relative;
+        }
+        .c-contextual-navigation {
+            width: 100%;
+        }
+        .c-contextual-navigation.is-stuck {
+            position: fixed;
+            z-index: 999;
+        }
+    </style>
+    <div id="middle" class="c-contextual-navigation full-width-nav">
+        <div class="contain-to-grid">
+            <nav class="top-bar mid-navigation" data-topbar="" role="navigation">
+                <ul class="title-area">
+                    <li class="name"></li>
+                    <li class="toggle-topbar menu-icon"><a href="#"><span></span></a></li>
+                </ul>
+                <section class="top-bar-section mid-nav">
+<ul class="left"><li><a class="" href="/technologies">Overview</a></li><li><a class="" href="/technologies/big-little">big.LITTLE</a></li><li><a class="active" href="/technologies/neon">NEON</a></li><li><a class="" href="/embedded/cmsis">CMSIS</a></li><li class="has-dropdown"><a class="" href="/technologies/dsp">DSP</a><ul class="dropdown"><li><a class="" href="/technologies/dsp">DSP Overview</a></li><li><a class="" href="/technologies/dsp/arm-dsp-ecosystem-partners">Arm DSP ecosystem partners</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-r">DSP for Cortex-R</a></li><li><a class="" href="/technologies/dsp/dsp-for-cortex-m">DSP for Cortex-M</a></li><li><a class="" href="/technologies/neon">NEON for Cortex-A and Cortex-R52</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm">Machine Learning on Arm</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material">Developer material</a><ul class="dropdown"><li class="has-dropdown"><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides">How-to guides</a><ul class="dropdown"><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-onnx">Configuring the Arm NN SDK build environment for ONNX</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow">Configuring the Arm NN SDK build environment for TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-tensorflow-lite">Configuring the Arm NN SDK build environment for TensorFlow Lite</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-model-on-openmv-using-cmsis-nn">Deploying a Caffe Model on OpenMV using CMSIS-NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-cloud-based-ml-for-speech-transcription">Deploying cloud-based ML for speech transcription</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/configuring-the-arm-nn-sdk-build-environment-for-caffe">Configuring the Arm NN SDK build environment for Caffe</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-tensorflow-mnist-model-on-arm-nn">Deploying a TensorFlow MNIST model on Arm NN</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/deploying-a-caffe-mnist-model-using-the-arm-nn-sdk">Deploying a Caffe MNIST model using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/profiling-alexnet-on-raspberry-pi-and-hikey-960-with-the-compute-library">Profiling AlexNet on Raspberry Pi and HiKey 960 with the Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/optimizing-neural-networks-for-mobile-and-embedded-devices-with-tensorflow">Optimizing neural networks for mobile and embedded devices with TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/running-alexnet-on-raspberry-pi-with-compute-library">Running AlexNet on Raspberry Pi with Compute Library</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/improving-your-machine-learning-workflow-using-the-arm-nn-sdk">Improving your machine learning workflow using the Arm NN SDK</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/quantizing-neural-networks-to-8-bit-using-tensorflow">Quantizing neural networks to 8-bit using TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/cross-compiling-arm-nn-for-the-raspberry-pi-and-tensorflow">Cross-compiling Arm NN for the Raspberry Pi and TensorFlow</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-yeah-world">Teach your Raspberry Pi - Yeah world</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/how-to-guides/teach-your-raspberry-pi-multi-gesture">Teach your Raspberry Pi - Multi-gesture</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/webinars">Webinars</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/software-for-machine-learning-on-arm">Software for Machine Learning on Arm</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/research-papers">Research papers</a></li><li><a class="" href="/technologies/machine-learning-on-arm/developer-material/white-papers">White papers</a></li></ul></li><li><a class="" href="/technologies/machine-learning-on-arm/ecosystem-partners">Ecosystem partners</a></li><li><a class="" href="https://community.arm.com/p/ml-blog">Blog</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm">Security on Arm</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-technologies">Arm technologies</a></li><li><a class="" href="/technologies/security-on-arm/how-do-i-implement">How do I implement</a></li><li><a class="" href="/technologies/security-on-arm/types-of-attack-and-counter-measures">Types of attack and counter-measures</a></li><li class="has-dropdown"><a class="" href="/technologies/security-on-arm/arm-security-developer-community">Arm Security Developer Community</a><ul class="dropdown"><li><a class="" href="/technologies/security-on-arm/arm-security-developer-community/arm-security-partners">Arm Security Partners</a></li></ul></li></ul></li><li><a class="" href="/technologies/uefi-drivers">UEFI Drivers</a></li><li><a class="" href="/technologies/dynamiq">DynamIQ</a></li><li class="has-dropdown"><a class="" href="/technologies/graphics-technologies">Graphics Technologies</a><ul class="dropdown"><li><a class="" href="/technologies/graphics-technologies/adaptive-scalable-texture-compression">Adaptive Scalable Texture Compression</a></li><li><a class="" href="/technologies/graphics-technologies/arm-frame-buffer-compression">Arm Frame Buffer Compression</a></li><li><a class="" href="/technologies/graphics-technologies/transaction-elimination">Transaction Elimination</a></li></ul></li><li class="has-dropdown"><a class="" href="/technologies/trustzone">TrustZone</a><ul class="dropdown"><li><a class="" href="/technologies/trustzone/webinar-how-to-implement-a-secure-iot-system-on-armv8-m">Webinar - How to implement a secure IoT system on Armv8-M</a></li></ul></li><li><a class="" href="/technologies/compute-library">Compute Library</a></li><li><a class="" href="/technologies/floating-point">Floating Point</a></li></ul>                </section>
+            </nav>
+        </div>
+    </div>
+    <script>
+        (function() {
+            var $contextualNavigation;
+            var $contextualNavigationWrapper;
+
+            function setHeight() {
+                var computedStyles = getComputedStyle($contextualNavigation);
+                var height = computedStyles.getPropertyValue('height');
+                $contextualNavigationWrapper.style.height = height;
+            };
+
+            function setPosition(evt) {
+                var $globalNavigationWrapper = document.querySelector('.arm-global-menu-wrapper');
+                var $notificationsWrapper = document.querySelector('.c-notifications-wrapper');
+
+                var pageOffset = window.pageYOffset;
+
+                var globalNavigationHeight = (!!$globalNavigationWrapper) ? $globalNavigationWrapper.clientHeight : 0;
+                var notificationsHeight = (!!$notificationsWrapper) ? $notificationsWrapper.clientHeight : 0;
+
+                var sum = globalNavigationHeight + notificationsHeight;
+                if (pageOffset >= sum) {
+                    $contextualNavigation.classList.add('is-stuck');
+                    $contextualNavigation.style.top = sum + 'px';
+                } else {
+                    $contextualNavigation.classList.remove('is-stuck');
+                    $contextualNavigation.style.top = 0;
+                }
+            };
+
+            function repaint(evt) {
+                $contextualNavigation = document.querySelector('.c-contextual-navigation');
+                $contextualNavigationWrapper = $contextualNavigation.parentElement;
+                setHeight();
+                setPosition();
+            };
+
+            window.addEventListener('scroll', repaint);
+            window.addEventListener('resize', repaint);
+
+            repaint();
+        })();
+    </script>
+</div>
+</header>
+
+
+<main class="c-component c-content" id="content" role="main">
+    <!-- START ProductItemContent -->
+<div>
+    
+
+
+<div id="c-d6ca4787-3a02-4c15-91dd-387f84915495" class="o-widget c-generic-content small-text-center large-text-left"  data-widget="generic-content-variation-1">
+    <article class="row">
+        <section class="columns">
+            <h2 class="c-panel__subtitle">NEON Intrinsics</h2>
+            <p>Click on the intrinsic name to display more information about the intrinsic. To search for an intrinsic, enter the name of the intrinsic in the search box. As you type, the matching intrinsics will be displayed.</p>
+        </section>
+    </article>
+</div>
+
+
+<div id="c-5e6d165c-46c9-4b6a-aae4-2aeb4639d070" class="c-widget c-html-snippet"  data-deprecated>
+    <div class="row">
+        <div class="columns">
+            <h2></h2>
+
+<style>
+.intrinsics-search input {
+    height: 2.4rem;
+	width: 80%;
+	display: inline;
+}
+
+.intrinsics-search button {
+    width: 20%;
+    height: 2.4rem;
+	display: inline;
+}
+
+.intrinsic-accordion label,
+.intrinsic-accordion input[type="checkbox"] + label
+{
+	font-family: Consolas, monospace;
+	padding: 0.25em 0.5em;
+	font-size: 1em;
+	position: relative;
+    display: block;
+    cursor: pointer;
+    background: #EFEFEF;
+    border: none;
+	margin: 0;
+    color: #565b5b;
+}
+
+.intrinsic-accordion label b {
+	color: #009fc1;
+}
+
+.intrinsic-accordion .right {
+	float:right;
+	font-family: sans-serif;
+	font-size: 0.8em;
+}
+
+.intrinsic-accordion .intrinsic_name {
+	color: blue;
+}
+
+.intrinsic-accordion .intrinsic a {
+	color: #009fc1;
+    text-decoration: underline;
+    text-decoration-style: solid;
+    font-weight: bold;
+}
+
+.intrinsic-accordion label:hover {
+    background: #F3F3F3;
+}
+
+.intrinsic-accordion input:checked + label,
+.intrinsic-accordion input:checked + label:hover {
+    background: #CDECC5;
+}
+.intrinsic-accordion input {
+    display: none;
+}
+.intrinsic-accordion article {
+    background: rgb(255, 255, 255);
+    /*overflow: hidden;*/
+    display: none;
+    -webkit-transition: all 0.3s ease-in-out;
+    -moz-transition: all 0.3s ease-in-out;
+    -o-transition: all 0.3s ease-in-out;
+    -ms-transition: all 0.3s ease-in-out;
+    transition: all 0.3s ease-in-out;
+}
+
+.intrinsic-accordion input:checked ~ article {
+    -webkit-transition: all 0.5s ease-in-out;
+    -moz-transition: all 0.5s ease-in-out;
+    -o-transition: all 0.5s ease-in-out;
+    -ms-transition: all 0.5s ease-in-out;
+    transition: all 0.5s ease-in-out;
+	display: block;
+}
+
+.intrinsic-accordion .intrinsic {
+    border: 1px solid #ededed;
+}
+
+.intrinsic-accordion .intrinsic article {
+    margin: 1rem;
+}
+
+</style>
+
+<script>
+
+</script>
+<script>
+$(document).ready(function(){
+  $("#js-intrinsics-query").keyup(function(){
+   var bla = $('#js-intrinsics-query').val();
+   $( ".intrinsic" ).each(function(){
+        var htxt=$(this).text();
+        if (htxt.toLowerCase().indexOf(bla.toLowerCase()) > -1) {
+            $(this).show();
+        } else {
+            $(this).hide();
+        }
+    });
+  });
+});
+
+</script>
+</head>
+<body>
+<div class="row">
+  <div class="large-6 columns spacing-2 intrinsics-search">
+	<input id="js-intrinsics-query"><button class="tiny"><i class="fa fa-search"></i></button>
+  </div>
+</div>
+
+<section class="intrinsic-accordion">
+<div class="intrinsic"><input id="vadd_s8" type="checkbox"><label for="vadd_s8"><div>int8x8_t <b><b>vadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s8" type="checkbox"><label for="vaddq_s8"><div>int8x16_t <b><b>vaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s16" type="checkbox"><label for="vadd_s16"><div>int16x4_t <b><b>vadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s16" type="checkbox"><label for="vaddq_s16"><div>int16x8_t <b><b>vaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s32" type="checkbox"><label for="vadd_s32"><div>int32x2_t <b><b>vadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s32" type="checkbox"><label for="vaddq_s32"><div>int32x4_t <b><b>vaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_s64" type="checkbox"><label for="vadd_s64"><div>int64x1_t <b><b>vadd_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_s64" type="checkbox"><label for="vaddq_s64"><div>int64x2_t <b><b>vaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u8" type="checkbox"><label for="vadd_u8"><div>uint8x8_t <b><b>vadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u8" type="checkbox"><label for="vaddq_u8"><div>uint8x16_t <b><b>vaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u16" type="checkbox"><label for="vadd_u16"><div>uint16x4_t <b><b>vadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u16" type="checkbox"><label for="vaddq_u16"><div>uint16x8_t <b><b>vaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u32" type="checkbox"><label for="vadd_u32"><div>uint32x2_t <b><b>vadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u32" type="checkbox"><label for="vaddq_u32"><div>uint32x4_t <b><b>vaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_u64" type="checkbox"><label for="vadd_u64"><div>uint64x1_t <b><b>vadd_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_u64" type="checkbox"><label for="vaddq_u64"><div>uint64x2_t <b><b>vaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_f32" type="checkbox"><label for="vadd_f32"><div>float32x2_t <b><b>vadd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_f32" type="checkbox"><label for="vaddq_f32"><div>float32x4_t <b><b>vaddq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vadd_f64" type="checkbox"><label for="vadd_f64"><div>float64x1_t <b><b>vadd_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddq_f64" type="checkbox"><label for="vaddq_f64"><div>float64x2_t <b><b>vaddq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&amp;FP registers, writes the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fadd-vector-floating-point-add-vector">FADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddd_s64" type="checkbox"><label for="vaddd_s64"><div>int64_t <b><b>vaddd_s64</b></b> (int64_t a, int64_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddd_u64" type="checkbox"><label for="vaddd_u64"><div>uint64_t <b><b>vaddd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add (vector). This instruction adds corresponding elements in the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/add-vector-add-vector">ADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s8" type="checkbox"><label for="vaddl_s8"><div>int16x8_t <b><b>vaddl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s16" type="checkbox"><label for="vaddl_s16"><div>int32x4_t <b><b>vaddl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_s32" type="checkbox"><label for="vaddl_s32"><div>int64x2_t <b><b>vaddl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u8" type="checkbox"><label for="vaddl_u8"><div>uint16x8_t <b><b>vaddl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u16" type="checkbox"><label for="vaddl_u16"><div>uint32x4_t <b><b>vaddl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_u32" type="checkbox"><label for="vaddl_u32"><div>uint64x2_t <b><b>vaddl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s8" type="checkbox"><label for="vaddl_high_s8"><div>int16x8_t <b><b>vaddl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s16" type="checkbox"><label for="vaddl_high_s16"><div>int32x4_t <b><b>vaddl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_s32" type="checkbox"><label for="vaddl_high_s32"><div>int64x2_t <b><b>vaddl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.  The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddl-saddl2-signed-add-long-vector">SADDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u8" type="checkbox"><label for="vaddl_high_u8"><div>uint16x8_t <b><b>vaddl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u16" type="checkbox"><label for="vaddl_high_u16"><div>uint32x4_t <b><b>vaddl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddl_high_u32" type="checkbox"><label for="vaddl_high_u32"><div>uint64x2_t <b><b>vaddl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&amp;FP register to the corresponding vector element of the second source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddl-uaddl2-unsigned-add-long-vector">UADDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s8" type="checkbox"><label for="vaddw_s8"><div>int16x8_t <b><b>vaddw_s8</b></b> (int16x8_t a, int8x8_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s16" type="checkbox"><label for="vaddw_s16"><div>int32x4_t <b><b>vaddw_s16</b></b> (int32x4_t a, int16x4_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_s32" type="checkbox"><label for="vaddw_s32"><div>int64x2_t <b><b>vaddw_s32</b></b> (int64x2_t a, int32x2_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u8" type="checkbox"><label for="vaddw_u8"><div>uint16x8_t <b><b>vaddw_u8</b></b> (uint16x8_t a, uint8x8_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u16" type="checkbox"><label for="vaddw_u16"><div>uint32x4_t <b><b>vaddw_u16</b></b> (uint32x4_t a, uint16x4_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_u32" type="checkbox"><label for="vaddw_u32"><div>uint64x2_t <b><b>vaddw_u32</b></b> (uint64x2_t a, uint32x2_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s8" type="checkbox"><label for="vaddw_high_s8"><div>int16x8_t <b><b>vaddw_high_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s16" type="checkbox"><label for="vaddw_high_s16"><div>int32x4_t <b><b>vaddw_high_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_s32" type="checkbox"><label for="vaddw_high_s32"><div>int64x2_t <b><b>vaddw_high_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Wide. This instruction adds vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddw-saddw2-signed-add-wide">SADDW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u8" type="checkbox"><label for="vaddw_high_u8"><div>uint16x8_t <b><b>vaddw_high_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u16" type="checkbox"><label for="vaddw_high_u16"><div>uint32x4_t <b><b>vaddw_high_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddw_high_u32" type="checkbox"><label for="vaddw_high_u32"><div>uint64x2_t <b><b>vaddw_high_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned add wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&amp;FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddw-uaddw2-unsigned-add-wide">UADDW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s8" type="checkbox"><label for="vhadd_s8"><div>int8x8_t <b><b>vhadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s8" type="checkbox"><label for="vhaddq_s8"><div>int8x16_t <b><b>vhaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s16" type="checkbox"><label for="vhadd_s16"><div>int16x4_t <b><b>vhadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s16" type="checkbox"><label for="vhaddq_s16"><div>int16x8_t <b><b>vhaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_s32" type="checkbox"><label for="vhadd_s32"><div>int32x2_t <b><b>vhadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_s32" type="checkbox"><label for="vhaddq_s32"><div>int32x4_t <b><b>vhaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shadd-signed-halving-add">SHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u8" type="checkbox"><label for="vhadd_u8"><div>uint8x8_t <b><b>vhadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u8" type="checkbox"><label for="vhaddq_u8"><div>uint8x16_t <b><b>vhaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u16" type="checkbox"><label for="vhadd_u16"><div>uint16x4_t <b><b>vhadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u16" type="checkbox"><label for="vhaddq_u16"><div>uint16x8_t <b><b>vhaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhadd_u32" type="checkbox"><label for="vhadd_u32"><div>uint32x2_t <b><b>vhadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhaddq_u32" type="checkbox"><label for="vhaddq_u32"><div>uint32x4_t <b><b>vhaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhadd-unsigned-halving-add">UHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s8" type="checkbox"><label for="vrhadd_s8"><div>int8x8_t <b><b>vrhadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s8" type="checkbox"><label for="vrhaddq_s8"><div>int8x16_t <b><b>vrhaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s16" type="checkbox"><label for="vrhadd_s16"><div>int16x4_t <b><b>vrhadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s16" type="checkbox"><label for="vrhaddq_s16"><div>int16x8_t <b><b>vrhaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_s32" type="checkbox"><label for="vrhadd_s32"><div>int32x2_t <b><b>vrhadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_s32" type="checkbox"><label for="vrhaddq_s32"><div>int32x4_t <b><b>vrhaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srhadd-signed-rounding-halving-add">SRHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u8" type="checkbox"><label for="vrhadd_u8"><div>uint8x8_t <b><b>vrhadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u8" type="checkbox"><label for="vrhaddq_u8"><div>uint8x16_t <b><b>vrhaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u16" type="checkbox"><label for="vrhadd_u16"><div>uint16x4_t <b><b>vrhadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u16" type="checkbox"><label for="vrhaddq_u16"><div>uint16x8_t <b><b>vrhaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhadd_u32" type="checkbox"><label for="vrhadd_u32"><div>uint32x2_t <b><b>vrhadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrhaddq_u32" type="checkbox"><label for="vrhaddq_u32"><div>uint32x4_t <b><b>vrhaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned rounding halving add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&amp;FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urhadd-unsigned-rounding-halving-add">URHADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (element1+element2+1)&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s8" type="checkbox"><label for="vqadd_s8"><div>int8x8_t <b><b>vqadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s8" type="checkbox"><label for="vqaddq_s8"><div>int8x16_t <b><b>vqaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s16" type="checkbox"><label for="vqadd_s16"><div>int16x4_t <b><b>vqadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s16" type="checkbox"><label for="vqaddq_s16"><div>int16x8_t <b><b>vqaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s32" type="checkbox"><label for="vqadd_s32"><div>int32x2_t <b><b>vqadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s32" type="checkbox"><label for="vqaddq_s32"><div>int32x4_t <b><b>vqaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_s64" type="checkbox"><label for="vqadd_s64"><div>int64x1_t <b><b>vqadd_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_s64" type="checkbox"><label for="vqaddq_s64"><div>int64x2_t <b><b>vqaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u8" type="checkbox"><label for="vqadd_u8"><div>uint8x8_t <b><b>vqadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u8" type="checkbox"><label for="vqaddq_u8"><div>uint8x16_t <b><b>vqaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u16" type="checkbox"><label for="vqadd_u16"><div>uint16x4_t <b><b>vqadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u16" type="checkbox"><label for="vqaddq_u16"><div>uint16x8_t <b><b>vqaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u32" type="checkbox"><label for="vqadd_u32"><div>uint32x2_t <b><b>vqadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u32" type="checkbox"><label for="vqaddq_u32"><div>uint32x4_t <b><b>vqaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqadd_u64" type="checkbox"><label for="vqadd_u64"><div>uint64x1_t <b><b>vqadd_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddq_u64" type="checkbox"><label for="vqaddq_u64"><div>uint64x2_t <b><b>vqaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqaddb_s8" type="checkbox"><label for="vqaddb_s8"><div>int8_t <b><b>vqaddb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddh_s16" type="checkbox"><label for="vqaddh_s16"><div>int16_t <b><b>vqaddh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqadds_s32" type="checkbox"><label for="vqadds_s32"><div>int32_t <b><b>vqadds_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddd_s64" type="checkbox"><label for="vqaddd_s64"><div>int64_t <b><b>vqaddd_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqadd-signed-saturating-add">SQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddb_u8" type="checkbox"><label for="vqaddb_u8"><div>uint8_t <b><b>vqaddb_u8</b></b> (uint8_t a, uint8_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddh_u16" type="checkbox"><label for="vqaddh_u16"><div>uint16_t <b><b>vqaddh_u16</b></b> (uint16_t a, uint16_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqadds_u32" type="checkbox"><label for="vqadds_u32"><div>uint32_t <b><b>vqadds_u32</b></b> (uint32_t a, uint32_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqaddd_u64" type="checkbox"><label for="vqaddd_u64"><div>uint64_t <b><b>vqaddd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Unsigned saturating add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&amp;FP registers, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqadd-unsigned-saturating-add">UQADD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s8" type="checkbox"><label for="vuqadd_s8"><div>int8x8_t <b><b>vuqadd_s8</b></b> (int8x8_t a, uint8x8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s8" type="checkbox"><label for="vuqaddq_s8"><div>int8x16_t <b><b>vuqaddq_s8</b></b> (int8x16_t a, uint8x16_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s16" type="checkbox"><label for="vuqadd_s16"><div>int16x4_t <b><b>vuqadd_s16</b></b> (int16x4_t a, uint16x4_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s16" type="checkbox"><label for="vuqaddq_s16"><div>int16x8_t <b><b>vuqaddq_s16</b></b> (int16x8_t a, uint16x8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s32" type="checkbox"><label for="vuqadd_s32"><div>int32x2_t <b><b>vuqadd_s32</b></b> (int32x2_t a, uint32x2_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s32" type="checkbox"><label for="vuqaddq_s32"><div>int32x4_t <b><b>vuqaddq_s32</b></b> (int32x4_t a, uint32x4_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadd_s64" type="checkbox"><label for="vuqadd_s64"><div>int64x1_t <b><b>vuqadd_s64</b></b> (int64x1_t a, uint64x1_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddq_s64" type="checkbox"><label for="vuqaddq_s64"><div>int64x2_t <b><b>vuqaddq_s64</b></b> (int64x2_t a, uint64x2_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddb_s8" type="checkbox"><label for="vuqaddb_s8"><div>int8_t <b><b>vuqaddb_s8</b></b> (int8_t a, uint8_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bd <br />
+b &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddh_s16" type="checkbox"><label for="vuqaddh_s16"><div>int16_t <b><b>vuqaddh_s16</b></b> (int16_t a, uint16_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hd <br />
+b &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqadds_s32" type="checkbox"><label for="vuqadds_s32"><div>int32_t <b><b>vuqadds_s32</b></b> (int32_t a, uint32_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuqaddd_s64" type="checkbox"><label for="vuqaddd_s64"><div>int64_t <b><b>vuqaddd_s64</b></b> (int64_t a, uint64_t b)<span class="right">Signed saturating accumulate of unsigned value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&amp;FP register to corresponding signed integer values of the vector elements in the destination SIMD&amp;FP register, and writes the resulting signed integer values to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/suqadd-signed-saturating-accumulate-of-unsigned-value">SUQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u8" type="checkbox"><label for="vsqadd_u8"><div>uint8x8_t <b><b>vsqadd_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u8" type="checkbox"><label for="vsqaddq_u8"><div>uint8x16_t <b><b>vsqaddq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u16" type="checkbox"><label for="vsqadd_u16"><div>uint16x4_t <b><b>vsqadd_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u16" type="checkbox"><label for="vsqaddq_u16"><div>uint16x8_t <b><b>vsqaddq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u32" type="checkbox"><label for="vsqadd_u32"><div>uint32x2_t <b><b>vsqadd_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u32" type="checkbox"><label for="vsqaddq_u32"><div>uint32x4_t <b><b>vsqaddq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadd_u64" type="checkbox"><label for="vsqadd_u64"><div>uint64x1_t <b><b>vsqadd_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddq_u64" type="checkbox"><label for="vsqaddq_u64"><div>uint64x2_t <b><b>vsqaddq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddb_u8" type="checkbox"><label for="vsqaddb_u8"><div>uint8_t <b><b>vsqaddb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bd <br />
+b &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddh_u16" type="checkbox"><label for="vsqaddh_u16"><div>uint16_t <b><b>vsqaddh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hd <br />
+b &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqadds_u32" type="checkbox"><label for="vsqadds_u32"><div>uint32_t <b><b>vsqadds_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqaddd_u64" type="checkbox"><label for="vsqaddd_u64"><div>uint64_t <b><b>vsqaddd_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating accumulate of signed value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&amp;FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&amp;FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usqadd-unsigned-saturating-accumulate-of-signed-value">USQADD</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], !unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s16" type="checkbox"><label for="vaddhn_s16"><div>int8x8_t <b><b>vaddhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s32" type="checkbox"><label for="vaddhn_s32"><div>int16x4_t <b><b>vaddhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_s64" type="checkbox"><label for="vaddhn_s64"><div>int32x2_t <b><b>vaddhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u16" type="checkbox"><label for="vaddhn_u16"><div>uint8x8_t <b><b>vaddhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u32" type="checkbox"><label for="vaddhn_u32"><div>uint16x4_t <b><b>vaddhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_u64" type="checkbox"><label for="vaddhn_u64"><div>uint32x2_t <b><b>vaddhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s16" type="checkbox"><label for="vaddhn_high_s16"><div>int8x16_t <b><b>vaddhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s32" type="checkbox"><label for="vaddhn_high_s32"><div>int16x8_t <b><b>vaddhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_s64" type="checkbox"><label for="vaddhn_high_s64"><div>int32x4_t <b><b>vaddhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u16" type="checkbox"><label for="vaddhn_high_u16"><div>uint8x16_t <b><b>vaddhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u32" type="checkbox"><label for="vaddhn_high_u32"><div>uint16x8_t <b><b>vaddhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddhn_high_u64" type="checkbox"><label for="vaddhn_high_u64"><div>uint32x4_t <b><b>vaddhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addhn-addhn2-add-returning-high-narrow">ADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s16" type="checkbox"><label for="vraddhn_s16"><div>int8x8_t <b><b>vraddhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s32" type="checkbox"><label for="vraddhn_s32"><div>int16x4_t <b><b>vraddhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_s64" type="checkbox"><label for="vraddhn_s64"><div>int32x2_t <b><b>vraddhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u16" type="checkbox"><label for="vraddhn_u16"><div>uint8x8_t <b><b>vraddhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u32" type="checkbox"><label for="vraddhn_u32"><div>uint16x4_t <b><b>vraddhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_u64" type="checkbox"><label for="vraddhn_u64"><div>uint32x2_t <b><b>vraddhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s16" type="checkbox"><label for="vraddhn_high_s16"><div>int8x16_t <b><b>vraddhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s32" type="checkbox"><label for="vraddhn_high_s32"><div>int16x8_t <b><b>vraddhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_s64" type="checkbox"><label for="vraddhn_high_s64"><div>int32x4_t <b><b>vraddhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u16" type="checkbox"><label for="vraddhn_high_u16"><div>uint8x16_t <b><b>vraddhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u32" type="checkbox"><label for="vraddhn_high_u32"><div>uint16x8_t <b><b>vraddhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vraddhn_high_u64" type="checkbox"><label for="vraddhn_high_u64"><div>uint32x4_t <b><b>vraddhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Rounding add returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&amp;FP register to the corresponding vector element in the second source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/raddhn-raddhn2-rounding-add-returning-high-narrow">RADDHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s8" type="checkbox"><label for="vmul_s8"><div>int8x8_t <b><b>vmul_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s8" type="checkbox"><label for="vmulq_s8"><div>int8x16_t <b><b>vmulq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s16" type="checkbox"><label for="vmul_s16"><div>int16x4_t <b><b>vmul_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s16" type="checkbox"><label for="vmulq_s16"><div>int16x8_t <b><b>vmulq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_s32" type="checkbox"><label for="vmul_s32"><div>int32x2_t <b><b>vmul_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_s32" type="checkbox"><label for="vmulq_s32"><div>int32x4_t <b><b>vmulq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u8" type="checkbox"><label for="vmul_u8"><div>uint8x8_t <b><b>vmul_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u8" type="checkbox"><label for="vmulq_u8"><div>uint8x16_t <b><b>vmulq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u16" type="checkbox"><label for="vmul_u16"><div>uint16x4_t <b><b>vmul_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u16" type="checkbox"><label for="vmulq_u16"><div>uint16x8_t <b><b>vmulq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_u32" type="checkbox"><label for="vmul_u32"><div>uint32x2_t <b><b>vmul_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_u32" type="checkbox"><label for="vmulq_u32"><div>uint32x4_t <b><b>vmulq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_f32" type="checkbox"><label for="vmul_f32"><div>float32x2_t <b><b>vmul_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_f32" type="checkbox"><label for="vmulq_f32"><div>float32x4_t <b><b>vmulq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_p8" type="checkbox"><label for="vmul_p8"><div>poly8x8_t <b><b>vmul_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Polynomial multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmul-polynomial-multiply">PMUL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_p8" type="checkbox"><label for="vmulq_p8"><div>poly8x16_t <b><b>vmulq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Polynomial multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmul-polynomial-multiply">PMUL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_f64" type="checkbox"><label for="vmul_f64"><div>float64x1_t <b><b>vmul_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_f64" type="checkbox"><label for="vmulq_f64"><div>float64x2_t <b><b>vmulq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_f32" type="checkbox"><label for="vmulx_f32"><div>float32x2_t <b><b>vmulx_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_f32" type="checkbox"><label for="vmulxq_f32"><div>float32x4_t <b><b>vmulxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_f64" type="checkbox"><label for="vmulx_f64"><div>float64x1_t <b><b>vmulx_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_f64" type="checkbox"><label for="vmulxq_f64"><div>float64x2_t <b><b>vmulxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_f32" type="checkbox"><label for="vmulxs_f32"><div>float32_t <b><b>vmulxs_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_f64" type="checkbox"><label for="vmulxd_f64"><div>float64_t <b><b>vmulxd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_lane_f32" type="checkbox"><label for="vmulx_lane_f32"><div>float32x2_t <b><b>vmulx_lane_f32</b></b> (float32x2_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_lane_f32" type="checkbox"><label for="vmulxq_lane_f32"><div>float32x4_t <b><b>vmulxq_lane_f32</b></b> (float32x4_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_lane_f64" type="checkbox"><label for="vmulx_lane_f64"><div>float64x1_t <b><b>vmulx_lane_f64</b></b> (float64x1_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_lane_f64" type="checkbox"><label for="vmulxq_lane_f64"><div>float64x2_t <b><b>vmulxq_lane_f64</b></b> (float64x2_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_lane_f32" type="checkbox"><label for="vmulxs_lane_f32"><div>float32_t <b><b>vmulxs_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_lane_f64" type="checkbox"><label for="vmulxd_lane_f64"><div>float64_t <b><b>vmulxd_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_laneq_f32" type="checkbox"><label for="vmulx_laneq_f32"><div>float32x2_t <b><b>vmulx_laneq_f32</b></b> (float32x2_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_laneq_f32" type="checkbox"><label for="vmulxq_laneq_f32"><div>float32x4_t <b><b>vmulxq_laneq_f32</b></b> (float32x4_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulx_laneq_f64" type="checkbox"><label for="vmulx_laneq_f64"><div>float64x1_t <b><b>vmulx_laneq_f64</b></b> (float64x1_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxq_laneq_f64" type="checkbox"><label for="vmulxq_laneq_f64"><div>float64x2_t <b><b>vmulxq_laneq_f64</b></b> (float64x2_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxs_laneq_f32" type="checkbox"><label for="vmulxs_laneq_f32"><div>float32_t <b><b>vmulxs_laneq_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulxd_laneq_f64" type="checkbox"><label for="vmulxd_laneq_f64"><div>float64_t <b><b>vmulxd_laneq_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply extended (by element)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&amp;FP register by the specified floating-point value in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmulx-by-element-floating-point-multiply-extended-by-element">FMULX</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(idxdsize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    if mulx_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulX.3" title="function: bits(N) FPMulX(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulX</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdiv_f32" type="checkbox"><label for="vdiv_f32"><div>float32x2_t <b><b>vdiv_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdivq_f32" type="checkbox"><label for="vdivq_f32"><div>float32x4_t <b><b>vdivq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdiv_f64" type="checkbox"><label for="vdiv_f64"><div>float64x1_t <b><b>vdiv_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdivq_f64" type="checkbox"><label for="vdivq_f64"><div>float64x2_t <b><b>vdivq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point divide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&amp;FP register, by the floating-point values in the corresponding elements in the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fdiv-vector-floating-point-divide-vector">FDIV</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPDiv.3" title="function: bits(N) FPDiv(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPDiv</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s8" type="checkbox"><label for="vmla_s8"><div>int8x8_t <b><b>vmla_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s8" type="checkbox"><label for="vmlaq_s8"><div>int8x16_t <b><b>vmlaq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s16" type="checkbox"><label for="vmla_s16"><div>int16x4_t <b><b>vmla_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s16" type="checkbox"><label for="vmlaq_s16"><div>int16x8_t <b><b>vmlaq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_s32" type="checkbox"><label for="vmla_s32"><div>int32x2_t <b><b>vmla_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_s32" type="checkbox"><label for="vmlaq_s32"><div>int32x4_t <b><b>vmlaq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u8" type="checkbox"><label for="vmla_u8"><div>uint8x8_t <b><b>vmla_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u8" type="checkbox"><label for="vmlaq_u8"><div>uint8x16_t <b><b>vmlaq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u16" type="checkbox"><label for="vmla_u16"><div>uint16x4_t <b><b>vmla_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u16" type="checkbox"><label for="vmlaq_u16"><div>uint16x8_t <b><b>vmlaq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_u32" type="checkbox"><label for="vmla_u32"><div>uint32x2_t <b><b>vmla_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_u32" type="checkbox"><label for="vmlaq_u32"><div>uint32x4_t <b><b>vmlaq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_f32" type="checkbox"><label for="vmla_f32"><div>float32x2_t <b><b>vmla_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_f32" type="checkbox"><label for="vmlaq_f32"><div>float32x4_t <b><b>vmlaq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_f64" type="checkbox"><label for="vmla_f64"><div>float64x1_t <b><b>vmla_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_f64" type="checkbox"><label for="vmlaq_f64"><div>float64x2_t <b><b>vmlaq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s8" type="checkbox"><label for="vmlal_s8"><div>int16x8_t <b><b>vmlal_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s16" type="checkbox"><label for="vmlal_s16"><div>int32x4_t <b><b>vmlal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_s32" type="checkbox"><label for="vmlal_s32"><div>int64x2_t <b><b>vmlal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u8" type="checkbox"><label for="vmlal_u8"><div>uint16x8_t <b><b>vmlal_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u16" type="checkbox"><label for="vmlal_u16"><div>uint32x4_t <b><b>vmlal_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_u32" type="checkbox"><label for="vmlal_u32"><div>uint64x2_t <b><b>vmlal_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s8" type="checkbox"><label for="vmlal_high_s8"><div>int16x8_t <b><b>vmlal_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s16" type="checkbox"><label for="vmlal_high_s16"><div>int32x4_t <b><b>vmlal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_s32" type="checkbox"><label for="vmlal_high_s32"><div>int64x2_t <b><b>vmlal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u8" type="checkbox"><label for="vmlal_high_u8"><div>uint16x8_t <b><b>vmlal_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u16" type="checkbox"><label for="vmlal_high_u16"><div>uint32x4_t <b><b>vmlal_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_u32" type="checkbox"><label for="vmlal_high_u32"><div>uint64x2_t <b><b>vmlal_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s8" type="checkbox"><label for="vmls_s8"><div>int8x8_t <b><b>vmls_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s8" type="checkbox"><label for="vmlsq_s8"><div>int8x16_t <b><b>vmlsq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s16" type="checkbox"><label for="vmls_s16"><div>int16x4_t <b><b>vmls_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s16" type="checkbox"><label for="vmlsq_s16"><div>int16x8_t <b><b>vmlsq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_s32" type="checkbox"><label for="vmls_s32"><div>int32x2_t <b><b>vmls_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_s32" type="checkbox"><label for="vmlsq_s32"><div>int32x4_t <b><b>vmlsq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u8" type="checkbox"><label for="vmls_u8"><div>uint8x8_t <b><b>vmls_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u8" type="checkbox"><label for="vmlsq_u8"><div>uint8x16_t <b><b>vmlsq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u16" type="checkbox"><label for="vmls_u16"><div>uint16x4_t <b><b>vmls_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u16" type="checkbox"><label for="vmlsq_u16"><div>uint16x8_t <b><b>vmlsq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_u32" type="checkbox"><label for="vmls_u32"><div>uint32x2_t <b><b>vmls_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_u32" type="checkbox"><label for="vmlsq_u32"><div>uint32x4_t <b><b>vmlsq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_f32" type="checkbox"><label for="vmls_f32"><div>float32x2_t <b><b>vmls_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_f32" type="checkbox"><label for="vmlsq_f32"><div>float32x4_t <b><b>vmlsq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_f64" type="checkbox"><label for="vmls_f64"><div>float64x1_t <b><b>vmls_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_f64" type="checkbox"><label for="vmlsq_f64"><div>float64x2_t <b><b>vmlsq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s8" type="checkbox"><label for="vmlsl_s8"><div>int16x8_t <b><b>vmlsl_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s16" type="checkbox"><label for="vmlsl_s16"><div>int32x4_t <b><b>vmlsl_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_s32" type="checkbox"><label for="vmlsl_s32"><div>int64x2_t <b><b>vmlsl_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u8" type="checkbox"><label for="vmlsl_u8"><div>uint16x8_t <b><b>vmlsl_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u16" type="checkbox"><label for="vmlsl_u16"><div>uint32x4_t <b><b>vmlsl_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_u32" type="checkbox"><label for="vmlsl_u32"><div>uint64x2_t <b><b>vmlsl_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s8" type="checkbox"><label for="vmlsl_high_s8"><div>int16x8_t <b><b>vmlsl_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s16" type="checkbox"><label for="vmlsl_high_s16"><div>int32x4_t <b><b>vmlsl_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_s32" type="checkbox"><label for="vmlsl_high_s32"><div>int64x2_t <b><b>vmlsl_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u8" type="checkbox"><label for="vmlsl_high_u8"><div>uint16x8_t <b><b>vmlsl_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u16" type="checkbox"><label for="vmlsl_high_u16"><div>uint32x4_t <b><b>vmlsl_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_u32" type="checkbox"><label for="vmlsl_high_u32"><div>uint64x2_t <b><b>vmlsl_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_f32" type="checkbox"><label for="vfma_f32"><div>float32x2_t <b><b>vfma_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_f32" type="checkbox"><label for="vfmaq_f32"><div>float32x4_t <b><b>vfmaq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfma_f64" type="checkbox"><label for="vfma_f64"><div>float64x1_t <b><b>vfma_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point fused multiply-add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, adds the product to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmadd-floating-point-fused-multiply-add-scalar">FMADD</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+c &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_f64" type="checkbox"><label for="vfmaq_f64"><div>float64x2_t <b><b>vfmaq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+c &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_lane_f32" type="checkbox"><label for="vfma_lane_f32"><div>float32x2_t <b><b>vfma_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_lane_f32" type="checkbox"><label for="vfmaq_lane_f32"><div>float32x4_t <b><b>vfmaq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_lane_f64" type="checkbox"><label for="vfma_lane_f64"><div>float64x1_t <b><b>vfma_lane_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_lane_f64" type="checkbox"><label for="vfmaq_lane_f64"><div>float64x2_t <b><b>vfmaq_lane_f64</b></b> (float64x2_t a, float64x2_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmas_lane_f32" type="checkbox"><label for="vfmas_lane_f32"><div>float32_t <b><b>vfmas_lane_f32</b></b> (float32_t a, float32_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmad_lane_f64" type="checkbox"><label for="vfmad_lane_f64"><div>float64_t <b><b>vfmad_lane_f64</b></b> (float64_t a, float64_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_laneq_f32" type="checkbox"><label for="vfma_laneq_f32"><div>float32x2_t <b><b>vfma_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_laneq_f32" type="checkbox"><label for="vfmaq_laneq_f32"><div>float32x4_t <b><b>vfmaq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_laneq_f64" type="checkbox"><label for="vfma_laneq_f64"><div>float64x1_t <b><b>vfma_laneq_f64</b></b> (float64x1_t a, float64x1_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_laneq_f64" type="checkbox"><label for="vfmaq_laneq_f64"><div>float64x2_t <b><b>vfmaq_laneq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmas_laneq_f32" type="checkbox"><label for="vfmas_laneq_f32"><div>float32_t <b><b>vfmas_laneq_f32</b></b> (float32_t a, float32_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmad_laneq_f64" type="checkbox"><label for="vfmad_laneq_f64"><div>float64_t <b><b>vfmad_laneq_f64</b></b> (float64_t a, float64_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_f32" type="checkbox"><label for="vfms_f32"><div>float32x2_t <b><b>vfms_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_f32" type="checkbox"><label for="vfmsq_f32"><div>float32x4_t <b><b>vfmsq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfms_f64" type="checkbox"><label for="vfms_f64"><div>float64x1_t <b><b>vfms_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Floating-point fused multiply-subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, negates the product, adds that to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmsub-floating-point-fused-multiply-subtract-scalar">FMSUB</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+c &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(operand1);
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_f64" type="checkbox"><label for="vfmsq_f64"><div>float64x2_t <b><b>vfmsq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+c &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_lane_f32" type="checkbox"><label for="vfms_lane_f32"><div>float32x2_t <b><b>vfms_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_lane_f32" type="checkbox"><label for="vfmsq_lane_f32"><div>float32x4_t <b><b>vfmsq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_lane_f64" type="checkbox"><label for="vfms_lane_f64"><div>float64x1_t <b><b>vfms_lane_f64</b></b> (float64x1_t a, float64x1_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_lane_f64" type="checkbox"><label for="vfmsq_lane_f64"><div>float64x2_t <b><b>vfmsq_lane_f64</b></b> (float64x2_t a, float64x2_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmss_lane_f32" type="checkbox"><label for="vfmss_lane_f32"><div>float32_t <b><b>vfmss_lane_f32</b></b> (float32_t a, float32_t b, float32x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsd_lane_f64" type="checkbox"><label for="vfmsd_lane_f64"><div>float64_t <b><b>vfmsd_lane_f64</b></b> (float64_t a, float64_t b, float64x1_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_laneq_f32" type="checkbox"><label for="vfms_laneq_f32"><div>float32x2_t <b><b>vfms_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_laneq_f32" type="checkbox"><label for="vfmsq_laneq_f32"><div>float32x4_t <b><b>vfmsq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_laneq_f64" type="checkbox"><label for="vfms_laneq_f64"><div>float64x1_t <b><b>vfms_laneq_f64</b></b> (float64x1_t a, float64x1_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_laneq_f64" type="checkbox"><label for="vfmsq_laneq_f64"><div>float64x2_t <b><b>vfmsq_laneq_f64</b></b> (float64x2_t a, float64x2_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmss_laneq_f32" type="checkbox"><label for="vfmss_laneq_f32"><div>float32_t <b><b>vfmss_laneq_f32</b></b> (float32_t a, float32_t b, float32x4_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsd_laneq_f64" type="checkbox"><label for="vfmsd_laneq_f64"><div>float64_t <b><b>vfmsd_laneq_f64</b></b> (float64_t a, float64_t b, float64x2_t v, const int lane)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_s16" type="checkbox"><label for="vqdmulh_s16"><div>int16x4_t <b><b>vqdmulh_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_s16" type="checkbox"><label for="vqdmulhq_s16"><div>int16x8_t <b><b>vqdmulhq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_s32" type="checkbox"><label for="vqdmulh_s32"><div>int32x2_t <b><b>vqdmulh_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_s32" type="checkbox"><label for="vqdmulhq_s32"><div>int32x4_t <b><b>vqdmulhq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_s16" type="checkbox"><label for="vqdmulhh_s16"><div>int16_t <b><b>vqdmulhh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_s32" type="checkbox"><label for="vqdmulhs_s32"><div>int32_t <b><b>vqdmulhs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_s16" type="checkbox"><label for="vqrdmulh_s16"><div>int16x4_t <b><b>vqrdmulh_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_s16" type="checkbox"><label for="vqrdmulhq_s16"><div>int16x8_t <b><b>vqrdmulhq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_s32" type="checkbox"><label for="vqrdmulh_s32"><div>int32x2_t <b><b>vqrdmulh_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_s32" type="checkbox"><label for="vqrdmulhq_s32"><div>int32x4_t <b><b>vqrdmulhq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_s16" type="checkbox"><label for="vqrdmulhh_s16"><div>int16_t <b><b>vqrdmulhh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_s32" type="checkbox"><label for="vqrdmulhs_s32"><div>int32_t <b><b>vqrdmulhs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_s16" type="checkbox"><label for="vqdmlal_s16"><div>int32x4_t <b><b>vqdmlal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_s32" type="checkbox"><label for="vqdmlal_s32"><div>int64x2_t <b><b>vqdmlal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_s16" type="checkbox"><label for="vqdmlalh_s16"><div>int32_t <b><b>vqdmlalh_s16</b></b> (int32_t a, int16_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+c &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_s32" type="checkbox"><label for="vqdmlals_s32"><div>int64_t <b><b>vqdmlals_s32</b></b> (int64_t a, int32_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+c &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_s16" type="checkbox"><label for="vqdmlal_high_s16"><div>int32x4_t <b><b>vqdmlal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_s32" type="checkbox"><label for="vqdmlal_high_s32"><div>int64x2_t <b><b>vqdmlal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_s16" type="checkbox"><label for="vqdmlsl_s16"><div>int32x4_t <b><b>vqdmlsl_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_s32" type="checkbox"><label for="vqdmlsl_s32"><div>int64x2_t <b><b>vqdmlsl_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_s16" type="checkbox"><label for="vqdmlslh_s16"><div>int32_t <b><b>vqdmlslh_s16</b></b> (int32_t a, int16_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+c &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_s32" type="checkbox"><label for="vqdmlsls_s32"><div>int64_t <b><b>vqdmlsls_s32</b></b> (int64_t a, int32_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+c &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_s16" type="checkbox"><label for="vqdmlsl_high_s16"><div>int32x4_t <b><b>vqdmlsl_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_s32" type="checkbox"><label for="vqdmlsl_high_s32"><div>int64x2_t <b><b>vqdmlsl_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s8" type="checkbox"><label for="vmull_s8"><div>int16x8_t <b><b>vmull_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s16" type="checkbox"><label for="vmull_s16"><div>int32x4_t <b><b>vmull_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_s32" type="checkbox"><label for="vmull_s32"><div>int64x2_t <b><b>vmull_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u8" type="checkbox"><label for="vmull_u8"><div>uint16x8_t <b><b>vmull_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u16" type="checkbox"><label for="vmull_u16"><div>uint32x4_t <b><b>vmull_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_u32" type="checkbox"><label for="vmull_u32"><div>uint64x2_t <b><b>vmull_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_p8" type="checkbox"><label for="vmull_p8"><div>poly16x8_t <b><b>vmull_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s8" type="checkbox"><label for="vmull_high_s8"><div>int16x8_t <b><b>vmull_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s16" type="checkbox"><label for="vmull_high_s16"><div>int32x4_t <b><b>vmull_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_s32" type="checkbox"><label for="vmull_high_s32"><div>int64x2_t <b><b>vmull_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u8" type="checkbox"><label for="vmull_high_u8"><div>uint16x8_t <b><b>vmull_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u16" type="checkbox"><label for="vmull_high_u16"><div>uint32x4_t <b><b>vmull_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_u32" type="checkbox"><label for="vmull_high_u32"><div>uint64x2_t <b><b>vmull_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_p8" type="checkbox"><label for="vmull_high_p8"><div>poly16x8_t <b><b>vmull_high_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_s16" type="checkbox"><label for="vqdmull_s16"><div>int32x4_t <b><b>vqdmull_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_s32" type="checkbox"><label for="vqdmull_s32"><div>int64x2_t <b><b>vqdmull_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_s16" type="checkbox"><label for="vqdmullh_s16"><div>int32_t <b><b>vqdmullh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_s32" type="checkbox"><label for="vqdmulls_s32"><div>int64_t <b><b>vqdmulls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_s16" type="checkbox"><label for="vqdmull_high_s16"><div>int32x4_t <b><b>vqdmull_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_s32" type="checkbox"><label for="vqdmull_high_s32"><div>int64x2_t <b><b>vqdmull_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s8" type="checkbox"><label for="vsub_s8"><div>int8x8_t <b><b>vsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s8" type="checkbox"><label for="vsubq_s8"><div>int8x16_t <b><b>vsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s16" type="checkbox"><label for="vsub_s16"><div>int16x4_t <b><b>vsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s16" type="checkbox"><label for="vsubq_s16"><div>int16x8_t <b><b>vsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s32" type="checkbox"><label for="vsub_s32"><div>int32x2_t <b><b>vsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s32" type="checkbox"><label for="vsubq_s32"><div>int32x4_t <b><b>vsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_s64" type="checkbox"><label for="vsub_s64"><div>int64x1_t <b><b>vsub_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_s64" type="checkbox"><label for="vsubq_s64"><div>int64x2_t <b><b>vsubq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u8" type="checkbox"><label for="vsub_u8"><div>uint8x8_t <b><b>vsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u8" type="checkbox"><label for="vsubq_u8"><div>uint8x16_t <b><b>vsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u16" type="checkbox"><label for="vsub_u16"><div>uint16x4_t <b><b>vsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u16" type="checkbox"><label for="vsubq_u16"><div>uint16x8_t <b><b>vsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u32" type="checkbox"><label for="vsub_u32"><div>uint32x2_t <b><b>vsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u32" type="checkbox"><label for="vsubq_u32"><div>uint32x4_t <b><b>vsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_u64" type="checkbox"><label for="vsub_u64"><div>uint64x1_t <b><b>vsub_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_u64" type="checkbox"><label for="vsubq_u64"><div>uint64x2_t <b><b>vsubq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_f32" type="checkbox"><label for="vsub_f32"><div>float32x2_t <b><b>vsub_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_f32" type="checkbox"><label for="vsubq_f32"><div>float32x4_t <b><b>vsubq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsub_f64" type="checkbox"><label for="vsub_f64"><div>float64x1_t <b><b>vsub_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubq_f64" type="checkbox"><label for="vsubq_f64"><div>float64x2_t <b><b>vsubq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register, from the corresponding elements in the vector in the first source SIMD&amp;FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsub-vector-floating-point-subtract-vector">FSUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubd_s64" type="checkbox"><label for="vsubd_s64"><div>int64_t <b><b>vsubd_s64</b></b> (int64_t a, int64_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubd_u64" type="checkbox"><label for="vsubd_u64"><div>uint64_t <b><b>vsubd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract (vector). This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sub-vector-subtract-vector">SUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 - element2;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s8" type="checkbox"><label for="vsubl_s8"><div>int16x8_t <b><b>vsubl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s16" type="checkbox"><label for="vsubl_s16"><div>int32x4_t <b><b>vsubl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_s32" type="checkbox"><label for="vsubl_s32"><div>int64x2_t <b><b>vsubl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u8" type="checkbox"><label for="vsubl_u8"><div>uint16x8_t <b><b>vsubl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u16" type="checkbox"><label for="vsubl_u16"><div>uint32x4_t <b><b>vsubl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_u32" type="checkbox"><label for="vsubl_u32"><div>uint64x2_t <b><b>vsubl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s8" type="checkbox"><label for="vsubl_high_s8"><div>int16x8_t <b><b>vsubl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s16" type="checkbox"><label for="vsubl_high_s16"><div>int32x4_t <b><b>vsubl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_s32" type="checkbox"><label for="vsubl_high_s32"><div>int64x2_t <b><b>vsubl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubl-ssubl2-signed-subtract-long">SSUBL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u8" type="checkbox"><label for="vsubl_high_u8"><div>uint16x8_t <b><b>vsubl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u16" type="checkbox"><label for="vsubl_high_u16"><div>uint32x4_t <b><b>vsubl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubl_high_u32" type="checkbox"><label for="vsubl_high_u32"><div>uint64x2_t <b><b>vsubl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubl-usubl2-unsigned-subtract-long">USUBL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s8" type="checkbox"><label for="vsubw_s8"><div>int16x8_t <b><b>vsubw_s8</b></b> (int16x8_t a, int8x8_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s16" type="checkbox"><label for="vsubw_s16"><div>int32x4_t <b><b>vsubw_s16</b></b> (int32x4_t a, int16x4_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_s32" type="checkbox"><label for="vsubw_s32"><div>int64x2_t <b><b>vsubw_s32</b></b> (int64x2_t a, int32x2_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u8" type="checkbox"><label for="vsubw_u8"><div>uint16x8_t <b><b>vsubw_u8</b></b> (uint16x8_t a, uint8x8_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.8H,Vn.8H,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u16" type="checkbox"><label for="vsubw_u16"><div>uint32x4_t <b><b>vsubw_u16</b></b> (uint32x4_t a, uint16x4_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.4S,Vn.4S,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_u32" type="checkbox"><label for="vsubw_u32"><div>uint64x2_t <b><b>vsubw_u32</b></b> (uint64x2_t a, uint32x2_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW</a> Vd.2D,Vn.2D,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s8" type="checkbox"><label for="vsubw_high_s8"><div>int16x8_t <b><b>vsubw_high_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s16" type="checkbox"><label for="vsubw_high_s16"><div>int32x4_t <b><b>vsubw_high_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_s32" type="checkbox"><label for="vsubw_high_s32"><div>int64x2_t <b><b>vsubw_high_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssubw-ssubw2-signed-subtract-wide">SSUBW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u8" type="checkbox"><label for="vsubw_high_u8"><div>uint16x8_t <b><b>vsubw_high_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.8H,Vn.8H,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u16" type="checkbox"><label for="vsubw_high_u16"><div>uint32x4_t <b><b>vsubw_high_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.4S,Vn.4S,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubw_high_u32" type="checkbox"><label for="vsubw_high_u32"><div>uint64x2_t <b><b>vsubw_high_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned subtract wide</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element in the lower or upper half of the first source SIMD&amp;FP register, places the result in a vector, and writes the vector to the SIMD&amp;FP destination register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usubw-usubw2-unsigned-subtract-wide">USUBW2</a> Vd.2D,Vn.2D,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = sum&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s8" type="checkbox"><label for="vhsub_s8"><div>int8x8_t <b><b>vhsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s8" type="checkbox"><label for="vhsubq_s8"><div>int8x16_t <b><b>vhsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s16" type="checkbox"><label for="vhsub_s16"><div>int16x4_t <b><b>vhsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s16" type="checkbox"><label for="vhsubq_s16"><div>int16x8_t <b><b>vhsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_s32" type="checkbox"><label for="vhsub_s32"><div>int32x2_t <b><b>vhsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_s32" type="checkbox"><label for="vhsubq_s32"><div>int32x4_t <b><b>vhsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&amp;FP register from the corresponding elements in the vector in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shsub-signed-halving-subtract">SHSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u8" type="checkbox"><label for="vhsub_u8"><div>uint8x8_t <b><b>vhsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u8" type="checkbox"><label for="vhsubq_u8"><div>uint8x16_t <b><b>vhsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u16" type="checkbox"><label for="vhsub_u16"><div>uint16x4_t <b><b>vhsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u16" type="checkbox"><label for="vhsubq_u16"><div>uint16x8_t <b><b>vhsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsub_u32" type="checkbox"><label for="vhsub_u32"><div>uint32x2_t <b><b>vhsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vhsubq_u32" type="checkbox"><label for="vhsubq_u32"><div>uint32x4_t <b><b>vhsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned halving subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&amp;FP register from the corresponding vector elements in the first source SIMD&amp;FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uhsub-unsigned-halving-subtract">UHSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = diff&lt;esize:1&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s8" type="checkbox"><label for="vqsub_s8"><div>int8x8_t <b><b>vqsub_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s8" type="checkbox"><label for="vqsubq_s8"><div>int8x16_t <b><b>vqsubq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s16" type="checkbox"><label for="vqsub_s16"><div>int16x4_t <b><b>vqsub_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s16" type="checkbox"><label for="vqsubq_s16"><div>int16x8_t <b><b>vqsubq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s32" type="checkbox"><label for="vqsub_s32"><div>int32x2_t <b><b>vqsub_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s32" type="checkbox"><label for="vqsubq_s32"><div>int32x4_t <b><b>vqsubq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_s64" type="checkbox"><label for="vqsub_s64"><div>int64x1_t <b><b>vqsub_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_s64" type="checkbox"><label for="vqsubq_s64"><div>int64x2_t <b><b>vqsubq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u8" type="checkbox"><label for="vqsub_u8"><div>uint8x8_t <b><b>vqsub_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u8" type="checkbox"><label for="vqsubq_u8"><div>uint8x16_t <b><b>vqsubq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u16" type="checkbox"><label for="vqsub_u16"><div>uint16x4_t <b><b>vqsub_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u16" type="checkbox"><label for="vqsubq_u16"><div>uint16x8_t <b><b>vqsubq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u32" type="checkbox"><label for="vqsub_u32"><div>uint32x2_t <b><b>vqsub_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u32" type="checkbox"><label for="vqsubq_u32"><div>uint32x4_t <b><b>vqsubq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsub_u64" type="checkbox"><label for="vqsub_u64"><div>uint64x1_t <b><b>vqsub_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubq_u64" type="checkbox"><label for="vqsubq_u64"><div>uint64x2_t <b><b>vqsubq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqsubb_s8" type="checkbox"><label for="vqsubb_s8"><div>int8_t <b><b>vqsubb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubh_s16" type="checkbox"><label for="vqsubh_s16"><div>int16_t <b><b>vqsubh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubs_s32" type="checkbox"><label for="vqsubs_s32"><div>int32_t <b><b>vqsubs_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubd_s64" type="checkbox"><label for="vqsubd_s64"><div>int64_t <b><b>vqsubd_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqsub-signed-saturating-subtract">SQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubb_u8" type="checkbox"><label for="vqsubb_u8"><div>uint8_t <b><b>vqsubb_u8</b></b> (uint8_t a, uint8_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubh_u16" type="checkbox"><label for="vqsubh_u16"><div>uint16_t <b><b>vqsubh_u16</b></b> (uint16_t a, uint16_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubs_u32" type="checkbox"><label for="vqsubs_u32"><div>uint32_t <b><b>vqsubs_u32</b></b> (uint32_t a, uint32_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqsubd_u64" type="checkbox"><label for="vqsubd_u64"><div>uint64_t <b><b>vqsubd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Unsigned saturating subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&amp;FP register from the corresponding element values of the first source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqsub-unsigned-saturating-subtract">UQSUB</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s16" type="checkbox"><label for="vsubhn_s16"><div>int8x8_t <b><b>vsubhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s32" type="checkbox"><label for="vsubhn_s32"><div>int16x4_t <b><b>vsubhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_s64" type="checkbox"><label for="vsubhn_s64"><div>int32x2_t <b><b>vsubhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u16" type="checkbox"><label for="vsubhn_u16"><div>uint8x8_t <b><b>vsubhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u32" type="checkbox"><label for="vsubhn_u32"><div>uint16x4_t <b><b>vsubhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_u64" type="checkbox"><label for="vsubhn_u64"><div>uint32x2_t <b><b>vsubhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s16" type="checkbox"><label for="vsubhn_high_s16"><div>int8x16_t <b><b>vsubhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s32" type="checkbox"><label for="vsubhn_high_s32"><div>int16x8_t <b><b>vsubhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_s64" type="checkbox"><label for="vsubhn_high_s64"><div>int32x4_t <b><b>vsubhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u16" type="checkbox"><label for="vsubhn_high_u16"><div>uint8x16_t <b><b>vsubhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u32" type="checkbox"><label for="vsubhn_high_u32"><div>uint16x8_t <b><b>vsubhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsubhn_high_u64" type="checkbox"><label for="vsubhn_high_u64"><div>uint32x4_t <b><b>vsubhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&amp;FP register from the corresponding vector element in the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/subhn-subhn2-subtract-returning-high-narrow">SUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s16" type="checkbox"><label for="vrsubhn_s16"><div>int8x8_t <b><b>vrsubhn_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s32" type="checkbox"><label for="vrsubhn_s32"><div>int16x4_t <b><b>vrsubhn_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_s64" type="checkbox"><label for="vrsubhn_s64"><div>int32x2_t <b><b>vrsubhn_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u16" type="checkbox"><label for="vrsubhn_u16"><div>uint8x8_t <b><b>vrsubhn_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.8B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u32" type="checkbox"><label for="vrsubhn_u32"><div>uint16x4_t <b><b>vrsubhn_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.4H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_u64" type="checkbox"><label for="vrsubhn_u64"><div>uint32x2_t <b><b>vrsubhn_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN</a> Vd.2S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s16" type="checkbox"><label for="vrsubhn_high_s16"><div>int8x16_t <b><b>vrsubhn_high_s16</b></b> (int8x8_t r, int16x8_t a, int16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s32" type="checkbox"><label for="vrsubhn_high_s32"><div>int16x8_t <b><b>vrsubhn_high_s32</b></b> (int16x4_t r, int32x4_t a, int32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_s64" type="checkbox"><label for="vrsubhn_high_s64"><div>int32x4_t <b><b>vrsubhn_high_s64</b></b> (int32x2_t r, int64x2_t a, int64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u16" type="checkbox"><label for="vrsubhn_high_u16"><div>uint8x16_t <b><b>vrsubhn_high_u16</b></b> (uint8x8_t r, uint16x8_t a, uint16x8_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.16B,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u32" type="checkbox"><label for="vrsubhn_high_u32"><div>uint16x8_t <b><b>vrsubhn_high_u32</b></b> (uint16x4_t r, uint32x4_t a, uint32x4_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.8H,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsubhn_high_u64" type="checkbox"><label for="vrsubhn_high_u64"><div>uint32x4_t <b><b>vrsubhn_high_u64</b></b> (uint32x2_t r, uint64x2_t a, uint64x2_t b)<span class="right">Rounding subtract returning high narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&amp;FP register from the corresponding vector element of the first source SIMD&amp;FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rsubhn-rsubhn2-rounding-subtract-returning-high-narrow">RSUBHN2</a> Vd.4S,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(2*datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if round then 1 &lt;&lt; (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 2*esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = sum&lt;2*esize-1:esize&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s8" type="checkbox"><label for="vceq_s8"><div>uint8x8_t <b><b>vceq_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s8" type="checkbox"><label for="vceqq_s8"><div>uint8x16_t <b><b>vceqq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s16" type="checkbox"><label for="vceq_s16"><div>uint16x4_t <b><b>vceq_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s16" type="checkbox"><label for="vceqq_s16"><div>uint16x8_t <b><b>vceqq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s32" type="checkbox"><label for="vceq_s32"><div>uint32x2_t <b><b>vceq_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s32" type="checkbox"><label for="vceqq_s32"><div>uint32x4_t <b><b>vceqq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u8" type="checkbox"><label for="vceq_u8"><div>uint8x8_t <b><b>vceq_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u8" type="checkbox"><label for="vceqq_u8"><div>uint8x16_t <b><b>vceqq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u16" type="checkbox"><label for="vceq_u16"><div>uint16x4_t <b><b>vceq_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u16" type="checkbox"><label for="vceqq_u16"><div>uint16x8_t <b><b>vceqq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u32" type="checkbox"><label for="vceq_u32"><div>uint32x2_t <b><b>vceq_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u32" type="checkbox"><label for="vceqq_u32"><div>uint32x4_t <b><b>vceqq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_f32" type="checkbox"><label for="vceq_f32"><div>uint32x2_t <b><b>vceq_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_f32" type="checkbox"><label for="vceqq_f32"><div>uint32x4_t <b><b>vceqq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_p8" type="checkbox"><label for="vceq_p8"><div>uint8x8_t <b><b>vceq_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_p8" type="checkbox"><label for="vceqq_p8"><div>uint8x16_t <b><b>vceqq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_s64" type="checkbox"><label for="vceq_s64"><div>uint64x1_t <b><b>vceq_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_s64" type="checkbox"><label for="vceqq_s64"><div>uint64x2_t <b><b>vceqq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_u64" type="checkbox"><label for="vceq_u64"><div>uint64x1_t <b><b>vceq_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_u64" type="checkbox"><label for="vceqq_u64"><div>uint64x2_t <b><b>vceqq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceq_p64" type="checkbox"><label for="vceq_p64"><div>uint64x1_t <b><b>vceq_p64</b></b> (poly64x1_t a, poly64x1_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_p64" type="checkbox"><label for="vceqq_p64"><div>uint64x2_t <b><b>vceqq_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceq_f64" type="checkbox"><label for="vceq_f64"><div>uint64x1_t <b><b>vceq_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqq_f64" type="checkbox"><label for="vceqq_f64"><div>uint64x2_t <b><b>vceqq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_s64" type="checkbox"><label for="vceqd_s64"><div>uint64_t <b><b>vceqd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_u64" type="checkbox"><label for="vceqd_u64"><div>uint64_t <b><b>vceqd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqs_f32" type="checkbox"><label for="vceqs_f32"><div>uint32_t <b><b>vceqs_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqd_f64" type="checkbox"><label for="vceqd_f64"><div>uint64_t <b><b>vceqd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s8" type="checkbox"><label for="vceqz_s8"><div>uint8x8_t <b><b>vceqz_s8</b></b> (int8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s8" type="checkbox"><label for="vceqzq_s8"><div>uint8x16_t <b><b>vceqzq_s8</b></b> (int8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s16" type="checkbox"><label for="vceqz_s16"><div>uint16x4_t <b><b>vceqz_s16</b></b> (int16x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s16" type="checkbox"><label for="vceqzq_s16"><div>uint16x8_t <b><b>vceqzq_s16</b></b> (int16x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s32" type="checkbox"><label for="vceqz_s32"><div>uint32x2_t <b><b>vceqz_s32</b></b> (int32x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s32" type="checkbox"><label for="vceqzq_s32"><div>uint32x4_t <b><b>vceqzq_s32</b></b> (int32x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u8" type="checkbox"><label for="vceqz_u8"><div>uint8x8_t <b><b>vceqz_u8</b></b> (uint8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u8" type="checkbox"><label for="vceqzq_u8"><div>uint8x16_t <b><b>vceqzq_u8</b></b> (uint8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u16" type="checkbox"><label for="vceqz_u16"><div>uint16x4_t <b><b>vceqz_u16</b></b> (uint16x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u16" type="checkbox"><label for="vceqzq_u16"><div>uint16x8_t <b><b>vceqzq_u16</b></b> (uint16x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u32" type="checkbox"><label for="vceqz_u32"><div>uint32x2_t <b><b>vceqz_u32</b></b> (uint32x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u32" type="checkbox"><label for="vceqzq_u32"><div>uint32x4_t <b><b>vceqzq_u32</b></b> (uint32x4_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_f32" type="checkbox"><label for="vceqz_f32"><div>uint32x2_t <b><b>vceqz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_f32" type="checkbox"><label for="vceqzq_f32"><div>uint32x4_t <b><b>vceqzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_p8" type="checkbox"><label for="vceqz_p8"><div>uint8x8_t <b><b>vceqz_p8</b></b> (poly8x8_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_p8" type="checkbox"><label for="vceqzq_p8"><div>uint8x16_t <b><b>vceqzq_p8</b></b> (poly8x16_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_s64" type="checkbox"><label for="vceqz_s64"><div>uint64x1_t <b><b>vceqz_s64</b></b> (int64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_s64" type="checkbox"><label for="vceqzq_s64"><div>uint64x2_t <b><b>vceqzq_s64</b></b> (int64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_u64" type="checkbox"><label for="vceqz_u64"><div>uint64x1_t <b><b>vceqz_u64</b></b> (uint64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_u64" type="checkbox"><label for="vceqzq_u64"><div>uint64x2_t <b><b>vceqzq_u64</b></b> (uint64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_p64" type="checkbox"><label for="vceqz_p64"><div>uint64x1_t <b><b>vceqz_p64</b></b> (poly64x1_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_p64" type="checkbox"><label for="vceqzq_p64"><div>uint64x2_t <b><b>vceqzq_p64</b></b> (poly64x2_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vceqz_f64" type="checkbox"><label for="vceqz_f64"><div>uint64x1_t <b><b>vceqz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzq_f64" type="checkbox"><label for="vceqzq_f64"><div>uint64x2_t <b><b>vceqzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_s64" type="checkbox"><label for="vceqzd_s64"><div>uint64_t <b><b>vceqzd_s64</b></b> (int64_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_u64" type="checkbox"><label for="vceqzd_u64"><div>uint64_t <b><b>vceqzd_u64</b></b> (uint64_t a)<span class="right">Compare bitwise equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmeq-zero-compare-bitwise-equal-to-zero-vector">CMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzs_f32" type="checkbox"><label for="vceqzs_f32"><div>uint32_t <b><b>vceqzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vceqzd_f64" type="checkbox"><label for="vceqzd_f64"><div>uint64_t <b><b>vceqzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmeq-zero-floating-point-compare-equal-to-zero-vector">FCMEQ</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s8" type="checkbox"><label for="vcge_s8"><div>uint8x8_t <b><b>vcge_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s8" type="checkbox"><label for="vcgeq_s8"><div>uint8x16_t <b><b>vcgeq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s16" type="checkbox"><label for="vcge_s16"><div>uint16x4_t <b><b>vcge_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s16" type="checkbox"><label for="vcgeq_s16"><div>uint16x8_t <b><b>vcgeq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s32" type="checkbox"><label for="vcge_s32"><div>uint32x2_t <b><b>vcge_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s32" type="checkbox"><label for="vcgeq_s32"><div>uint32x4_t <b><b>vcgeq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u8" type="checkbox"><label for="vcge_u8"><div>uint8x8_t <b><b>vcge_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u8" type="checkbox"><label for="vcgeq_u8"><div>uint8x16_t <b><b>vcgeq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u16" type="checkbox"><label for="vcge_u16"><div>uint16x4_t <b><b>vcge_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u16" type="checkbox"><label for="vcgeq_u16"><div>uint16x8_t <b><b>vcgeq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u32" type="checkbox"><label for="vcge_u32"><div>uint32x2_t <b><b>vcge_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u32" type="checkbox"><label for="vcgeq_u32"><div>uint32x4_t <b><b>vcgeq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_f32" type="checkbox"><label for="vcge_f32"><div>uint32x2_t <b><b>vcge_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_f32" type="checkbox"><label for="vcgeq_f32"><div>uint32x4_t <b><b>vcgeq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcge_s64" type="checkbox"><label for="vcge_s64"><div>uint64x1_t <b><b>vcge_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_s64" type="checkbox"><label for="vcgeq_s64"><div>uint64x2_t <b><b>vcgeq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_u64" type="checkbox"><label for="vcge_u64"><div>uint64x1_t <b><b>vcge_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_u64" type="checkbox"><label for="vcgeq_u64"><div>uint64x2_t <b><b>vcgeq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcge_f64" type="checkbox"><label for="vcge_f64"><div>uint64x1_t <b><b>vcge_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgeq_f64" type="checkbox"><label for="vcgeq_f64"><div>uint64x2_t <b><b>vcgeq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_s64" type="checkbox"><label for="vcged_s64"><div>uint64_t <b><b>vcged_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_u64" type="checkbox"><label for="vcged_u64"><div>uint64_t <b><b>vcged_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcges_f32" type="checkbox"><label for="vcges_f32"><div>uint32_t <b><b>vcges_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcged_f64" type="checkbox"><label for="vcged_f64"><div>uint64_t <b><b>vcged_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s8" type="checkbox"><label for="vcgez_s8"><div>uint8x8_t <b><b>vcgez_s8</b></b> (int8x8_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s8" type="checkbox"><label for="vcgezq_s8"><div>uint8x16_t <b><b>vcgezq_s8</b></b> (int8x16_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s16" type="checkbox"><label for="vcgez_s16"><div>uint16x4_t <b><b>vcgez_s16</b></b> (int16x4_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s16" type="checkbox"><label for="vcgezq_s16"><div>uint16x8_t <b><b>vcgezq_s16</b></b> (int16x8_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s32" type="checkbox"><label for="vcgez_s32"><div>uint32x2_t <b><b>vcgez_s32</b></b> (int32x2_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s32" type="checkbox"><label for="vcgezq_s32"><div>uint32x4_t <b><b>vcgezq_s32</b></b> (int32x4_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_s64" type="checkbox"><label for="vcgez_s64"><div>uint64x1_t <b><b>vcgez_s64</b></b> (int64x1_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_s64" type="checkbox"><label for="vcgezq_s64"><div>uint64x2_t <b><b>vcgezq_s64</b></b> (int64x2_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_f32" type="checkbox"><label for="vcgez_f32"><div>uint32x2_t <b><b>vcgez_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_f32" type="checkbox"><label for="vcgezq_f32"><div>uint32x4_t <b><b>vcgezq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgez_f64" type="checkbox"><label for="vcgez_f64"><div>uint64x1_t <b><b>vcgez_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezq_f64" type="checkbox"><label for="vcgezq_f64"><div>uint64x2_t <b><b>vcgezq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezd_s64" type="checkbox"><label for="vcgezd_s64"><div>uint64_t <b><b>vcgezd_s64</b></b> (int64_t a)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezs_f32" type="checkbox"><label for="vcgezs_f32"><div>uint32_t <b><b>vcgezs_f32</b></b> (float32_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgezd_f64" type="checkbox"><label for="vcgezd_f64"><div>uint64_t <b><b>vcgezd_f64</b></b> (float64_t a)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s8" type="checkbox"><label for="vcle_s8"><div>uint8x8_t <b><b>vcle_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s8" type="checkbox"><label for="vcleq_s8"><div>uint8x16_t <b><b>vcleq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s16" type="checkbox"><label for="vcle_s16"><div>uint16x4_t <b><b>vcle_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s16" type="checkbox"><label for="vcleq_s16"><div>uint16x8_t <b><b>vcleq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s32" type="checkbox"><label for="vcle_s32"><div>uint32x2_t <b><b>vcle_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s32" type="checkbox"><label for="vcleq_s32"><div>uint32x4_t <b><b>vcleq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u8" type="checkbox"><label for="vcle_u8"><div>uint8x8_t <b><b>vcle_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u8" type="checkbox"><label for="vcleq_u8"><div>uint8x16_t <b><b>vcleq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u16" type="checkbox"><label for="vcle_u16"><div>uint16x4_t <b><b>vcle_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u16" type="checkbox"><label for="vcleq_u16"><div>uint16x8_t <b><b>vcleq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u32" type="checkbox"><label for="vcle_u32"><div>uint32x2_t <b><b>vcle_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u32" type="checkbox"><label for="vcleq_u32"><div>uint32x4_t <b><b>vcleq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_f32" type="checkbox"><label for="vcle_f32"><div>uint32x2_t <b><b>vcle_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_f32" type="checkbox"><label for="vcleq_f32"><div>uint32x4_t <b><b>vcleq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcle_s64" type="checkbox"><label for="vcle_s64"><div>uint64x1_t <b><b>vcle_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_s64" type="checkbox"><label for="vcleq_s64"><div>uint64x2_t <b><b>vcleq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_u64" type="checkbox"><label for="vcle_u64"><div>uint64x1_t <b><b>vcle_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_u64" type="checkbox"><label for="vcleq_u64"><div>uint64x2_t <b><b>vcleq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcle_f64" type="checkbox"><label for="vcle_f64"><div>uint64x1_t <b><b>vcle_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcleq_f64" type="checkbox"><label for="vcleq_f64"><div>uint64x2_t <b><b>vcleq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_s64" type="checkbox"><label for="vcled_s64"><div>uint64_t <b><b>vcled_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmge-zero-compare-signed-greater-than-or-equal-to-zero-vector">CMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_u64" type="checkbox"><label for="vcled_u64"><div>uint64_t <b><b>vcled_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher or same</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcles_f32" type="checkbox"><label for="vcles_f32"><div>uint32_t <b><b>vcles_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcled_f64" type="checkbox"><label for="vcled_f64"><div>uint64_t <b><b>vcled_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmge-zero-floating-point-compare-greater-than-or-equal-to-zero-vector">FCMGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s8" type="checkbox"><label for="vclez_s8"><div>uint8x8_t <b><b>vclez_s8</b></b> (int8x8_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s8" type="checkbox"><label for="vclezq_s8"><div>uint8x16_t <b><b>vclezq_s8</b></b> (int8x16_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s16" type="checkbox"><label for="vclez_s16"><div>uint16x4_t <b><b>vclez_s16</b></b> (int16x4_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s16" type="checkbox"><label for="vclezq_s16"><div>uint16x8_t <b><b>vclezq_s16</b></b> (int16x8_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s32" type="checkbox"><label for="vclez_s32"><div>uint32x2_t <b><b>vclez_s32</b></b> (int32x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s32" type="checkbox"><label for="vclezq_s32"><div>uint32x4_t <b><b>vclezq_s32</b></b> (int32x4_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_s64" type="checkbox"><label for="vclez_s64"><div>uint64x1_t <b><b>vclez_s64</b></b> (int64x1_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_s64" type="checkbox"><label for="vclezq_s64"><div>uint64x2_t <b><b>vclezq_s64</b></b> (int64x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_f32" type="checkbox"><label for="vclez_f32"><div>uint32x2_t <b><b>vclez_f32</b></b> (float32x2_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_f32" type="checkbox"><label for="vclezq_f32"><div>uint32x4_t <b><b>vclezq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclez_f64" type="checkbox"><label for="vclez_f64"><div>uint64x1_t <b><b>vclez_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezq_f64" type="checkbox"><label for="vclezq_f64"><div>uint64x2_t <b><b>vclezq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezd_s64" type="checkbox"><label for="vclezd_s64"><div>uint64_t <b><b>vclezd_s64</b></b> (int64_t a)<span class="right">Compare signed less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmle-zero-compare-signed-less-than-or-equal-to-zero-vector">CMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezs_f32" type="checkbox"><label for="vclezs_f32"><div>uint32_t <b><b>vclezs_f32</b></b> (float32_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclezd_f64" type="checkbox"><label for="vclezd_f64"><div>uint64_t <b><b>vclezd_f64</b></b> (float64_t a)<span class="right">Floating-point compare less than or equal to zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmle-zero-floating-point-compare-less-than-or-equal-to-zero-vector">FCMLE</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s8" type="checkbox"><label for="vcgt_s8"><div>uint8x8_t <b><b>vcgt_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s8" type="checkbox"><label for="vcgtq_s8"><div>uint8x16_t <b><b>vcgtq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s16" type="checkbox"><label for="vcgt_s16"><div>uint16x4_t <b><b>vcgt_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s16" type="checkbox"><label for="vcgtq_s16"><div>uint16x8_t <b><b>vcgtq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s32" type="checkbox"><label for="vcgt_s32"><div>uint32x2_t <b><b>vcgt_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s32" type="checkbox"><label for="vcgtq_s32"><div>uint32x4_t <b><b>vcgtq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u8" type="checkbox"><label for="vcgt_u8"><div>uint8x8_t <b><b>vcgt_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u8" type="checkbox"><label for="vcgtq_u8"><div>uint8x16_t <b><b>vcgtq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u16" type="checkbox"><label for="vcgt_u16"><div>uint16x4_t <b><b>vcgt_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u16" type="checkbox"><label for="vcgtq_u16"><div>uint16x8_t <b><b>vcgtq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u32" type="checkbox"><label for="vcgt_u32"><div>uint32x2_t <b><b>vcgt_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u32" type="checkbox"><label for="vcgtq_u32"><div>uint32x4_t <b><b>vcgtq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_f32" type="checkbox"><label for="vcgt_f32"><div>uint32x2_t <b><b>vcgt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_f32" type="checkbox"><label for="vcgtq_f32"><div>uint32x4_t <b><b>vcgtq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_s64" type="checkbox"><label for="vcgt_s64"><div>uint64x1_t <b><b>vcgt_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_s64" type="checkbox"><label for="vcgtq_s64"><div>uint64x2_t <b><b>vcgtq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_u64" type="checkbox"><label for="vcgt_u64"><div>uint64x1_t <b><b>vcgt_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_u64" type="checkbox"><label for="vcgtq_u64"><div>uint64x2_t <b><b>vcgtq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgt_f64" type="checkbox"><label for="vcgt_f64"><div>uint64x1_t <b><b>vcgt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtq_f64" type="checkbox"><label for="vcgtq_f64"><div>uint64x2_t <b><b>vcgtq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_s64" type="checkbox"><label for="vcgtd_s64"><div>uint64_t <b><b>vcgtd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_u64" type="checkbox"><label for="vcgtd_u64"><div>uint64_t <b><b>vcgtd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgts_f32" type="checkbox"><label for="vcgts_f32"><div>uint32_t <b><b>vcgts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtd_f64" type="checkbox"><label for="vcgtd_f64"><div>uint64_t <b><b>vcgtd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s8" type="checkbox"><label for="vcgtz_s8"><div>uint8x8_t <b><b>vcgtz_s8</b></b> (int8x8_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s8" type="checkbox"><label for="vcgtzq_s8"><div>uint8x16_t <b><b>vcgtzq_s8</b></b> (int8x16_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s16" type="checkbox"><label for="vcgtz_s16"><div>uint16x4_t <b><b>vcgtz_s16</b></b> (int16x4_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s16" type="checkbox"><label for="vcgtzq_s16"><div>uint16x8_t <b><b>vcgtzq_s16</b></b> (int16x8_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s32" type="checkbox"><label for="vcgtz_s32"><div>uint32x2_t <b><b>vcgtz_s32</b></b> (int32x2_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s32" type="checkbox"><label for="vcgtzq_s32"><div>uint32x4_t <b><b>vcgtzq_s32</b></b> (int32x4_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_s64" type="checkbox"><label for="vcgtz_s64"><div>uint64x1_t <b><b>vcgtz_s64</b></b> (int64x1_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_s64" type="checkbox"><label for="vcgtzq_s64"><div>uint64x2_t <b><b>vcgtzq_s64</b></b> (int64x2_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_f32" type="checkbox"><label for="vcgtz_f32"><div>uint32x2_t <b><b>vcgtz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_f32" type="checkbox"><label for="vcgtzq_f32"><div>uint32x4_t <b><b>vcgtzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtz_f64" type="checkbox"><label for="vcgtz_f64"><div>uint64x1_t <b><b>vcgtz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzq_f64" type="checkbox"><label for="vcgtzq_f64"><div>uint64x2_t <b><b>vcgtzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzd_s64" type="checkbox"><label for="vcgtzd_s64"><div>uint64_t <b><b>vcgtzd_s64</b></b> (int64_t a)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzs_f32" type="checkbox"><label for="vcgtzs_f32"><div>uint32_t <b><b>vcgtzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcgtzd_f64" type="checkbox"><label for="vcgtzd_f64"><div>uint64_t <b><b>vcgtzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s8" type="checkbox"><label for="vclt_s8"><div>uint8x8_t <b><b>vclt_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s8" type="checkbox"><label for="vcltq_s8"><div>uint8x16_t <b><b>vcltq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s16" type="checkbox"><label for="vclt_s16"><div>uint16x4_t <b><b>vclt_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s16" type="checkbox"><label for="vcltq_s16"><div>uint16x8_t <b><b>vcltq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s32" type="checkbox"><label for="vclt_s32"><div>uint32x2_t <b><b>vclt_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s32" type="checkbox"><label for="vcltq_s32"><div>uint32x4_t <b><b>vcltq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u8" type="checkbox"><label for="vclt_u8"><div>uint8x8_t <b><b>vclt_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8B,Vm.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u8" type="checkbox"><label for="vcltq_u8"><div>uint8x16_t <b><b>vcltq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.16B,Vm.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u16" type="checkbox"><label for="vclt_u16"><div>uint16x4_t <b><b>vclt_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4H,Vm.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u16" type="checkbox"><label for="vcltq_u16"><div>uint16x8_t <b><b>vcltq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.8H,Vm.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u32" type="checkbox"><label for="vclt_u32"><div>uint32x2_t <b><b>vclt_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u32" type="checkbox"><label for="vcltq_u32"><div>uint32x4_t <b><b>vcltq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_f32" type="checkbox"><label for="vclt_f32"><div>uint32x2_t <b><b>vclt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_f32" type="checkbox"><label for="vcltq_f32"><div>uint32x4_t <b><b>vcltq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclt_s64" type="checkbox"><label for="vclt_s64"><div>uint64x1_t <b><b>vclt_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_s64" type="checkbox"><label for="vcltq_s64"><div>uint64x2_t <b><b>vcltq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_u64" type="checkbox"><label for="vclt_u64"><div>uint64x1_t <b><b>vclt_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_u64" type="checkbox"><label for="vcltq_u64"><div>uint64x2_t <b><b>vcltq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclt_f64" type="checkbox"><label for="vclt_f64"><div>uint64x1_t <b><b>vclt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltq_f64" type="checkbox"><label for="vcltq_f64"><div>uint64x2_t <b><b>vcltq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_s64" type="checkbox"><label for="vcltd_s64"><div>uint64_t <b><b>vcltd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare signed greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmgt-zero-compare-signed-greater-than-zero-vector">CMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_u64" type="checkbox"><label for="vcltd_u64"><div>uint64_t <b><b>vcltd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare unsigned higher</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&amp;FP register with the corresponding vector element in the second source SIMD&amp;FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhi-register-compare-unsigned-higher-vector">CMHI</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 &gt;= element2 else element1 &gt; element2;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vclts_f32" type="checkbox"><label for="vclts_f32"><div>uint32_t <b><b>vclts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltd_f64" type="checkbox"><label for="vcltd_f64"><div>uint64_t <b><b>vcltd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point compare greater than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmgt-zero-floating-point-compare-greater-than-zero-vector">FCMGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s8" type="checkbox"><label for="vcltz_s8"><div>uint8x8_t <b><b>vcltz_s8</b></b> (int8x8_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.8B,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s8" type="checkbox"><label for="vcltzq_s8"><div>uint8x16_t <b><b>vcltzq_s8</b></b> (int8x16_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.16B,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s16" type="checkbox"><label for="vcltz_s16"><div>uint16x4_t <b><b>vcltz_s16</b></b> (int16x4_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.4H,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s16" type="checkbox"><label for="vcltzq_s16"><div>uint16x8_t <b><b>vcltzq_s16</b></b> (int16x8_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.8H,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s32" type="checkbox"><label for="vcltz_s32"><div>uint32x2_t <b><b>vcltz_s32</b></b> (int32x2_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s32" type="checkbox"><label for="vcltzq_s32"><div>uint32x4_t <b><b>vcltzq_s32</b></b> (int32x4_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_s64" type="checkbox"><label for="vcltz_s64"><div>uint64x1_t <b><b>vcltz_s64</b></b> (int64x1_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_s64" type="checkbox"><label for="vcltzq_s64"><div>uint64x2_t <b><b>vcltzq_s64</b></b> (int64x2_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_f32" type="checkbox"><label for="vcltz_f32"><div>uint32x2_t <b><b>vcltz_f32</b></b> (float32x2_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.2S,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_f32" type="checkbox"><label for="vcltzq_f32"><div>uint32x4_t <b><b>vcltzq_f32</b></b> (float32x4_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.4S,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltz_f64" type="checkbox"><label for="vcltz_f64"><div>uint64x1_t <b><b>vcltz_f64</b></b> (float64x1_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzq_f64" type="checkbox"><label for="vcltzq_f64"><div>uint64x2_t <b><b>vcltzq_f64</b></b> (float64x2_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Vd.2D,Vn.2D,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzd_s64" type="checkbox"><label for="vcltzd_s64"><div>uint64_t <b><b>vcltzd_s64</b></b> (int64_t a)<span class="right">Compare signed less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&amp;FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmlt-zero-compare-signed-less-than-zero-vector">CMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = element &gt; 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = element &gt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = element == 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = element &lt;= 0;
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = element &lt; 0;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzs_f32" type="checkbox"><label for="vcltzs_f32"><div>uint32_t <b><b>vcltzs_f32</b></b> (float32_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Sd,Sn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcltzd_f64" type="checkbox"><label for="vcltzd_f64"><div>uint64_t <b><b>vcltzd_f64</b></b> (float64_t a)<span class="right">Floating-point compare less than zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&amp;FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcmlt-zero-floating-point-compare-less-than-zero-vector">FCMLT</a> Dd,Dn,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) zero = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPZero.1" title="function: bits(N) FPZero(bit sign)">FPZero</a>('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    case comparison of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element, zero, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(zero, element, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_LT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_LT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(zero, element, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcage_f32" type="checkbox"><label for="vcage_f32"><div>uint32x2_t <b><b>vcage_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcageq_f32" type="checkbox"><label for="vcageq_f32"><div>uint32x4_t <b><b>vcageq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcage_f64" type="checkbox"><label for="vcage_f64"><div>uint64x1_t <b><b>vcage_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcageq_f64" type="checkbox"><label for="vcageq_f64"><div>uint64x2_t <b><b>vcageq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcages_f32" type="checkbox"><label for="vcages_f32"><div>uint32_t <b><b>vcages_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaged_f64" type="checkbox"><label for="vcaged_f64"><div>uint64_t <b><b>vcaged_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcale_f32" type="checkbox"><label for="vcale_f32"><div>uint32x2_t <b><b>vcale_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcaleq_f32" type="checkbox"><label for="vcaleq_f32"><div>uint32x4_t <b><b>vcaleq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcale_f64" type="checkbox"><label for="vcale_f64"><div>uint64x1_t <b><b>vcale_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaleq_f64" type="checkbox"><label for="vcaleq_f64"><div>uint64x2_t <b><b>vcaleq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Vd.2D,Vm.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcales_f32" type="checkbox"><label for="vcales_f32"><div>uint32_t <b><b>vcales_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaled_f64" type="checkbox"><label for="vcaled_f64"><div>uint64_t <b><b>vcaled_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than or equal</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&amp;FP register with the absolute value of the corresponding floating-point value in the second source SIMD&amp;FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facge-floating-point-absolute-compare-greater-than-or-equal-vector">FACGE</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagt_f32" type="checkbox"><label for="vcagt_f32"><div>uint32x2_t <b><b>vcagt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcagtq_f32" type="checkbox"><label for="vcagtq_f32"><div>uint32x4_t <b><b>vcagtq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcagt_f64" type="checkbox"><label for="vcagt_f64"><div>uint64x1_t <b><b>vcagt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagtq_f64" type="checkbox"><label for="vcagtq_f64"><div>uint64x2_t <b><b>vcagtq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagts_f32" type="checkbox"><label for="vcagts_f32"><div>uint32_t <b><b>vcagts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcagtd_f64" type="checkbox"><label for="vcagtd_f64"><div>uint64_t <b><b>vcagtd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcalt_f32" type="checkbox"><label for="vcalt_f32"><div>uint32x2_t <b><b>vcalt_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2S,Vm.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcaltq_f32" type="checkbox"><label for="vcaltq_f32"><div>uint32x4_t <b><b>vcaltq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.4S,Vm.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcalt_f64" type="checkbox"><label for="vcalt_f64"><div>uint64x1_t <b><b>vcalt_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaltq_f64" type="checkbox"><label for="vcaltq_f64"><div>uint64x2_t <b><b>vcaltq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcalts_f32" type="checkbox"><label for="vcalts_f32"><div>uint32_t <b><b>vcalts_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Sd,Sm,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcaltd_f64" type="checkbox"><label for="vcaltd_f64"><div>uint64_t <b><b>vcaltd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute compare greater than</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&amp;FP register with the absolute value of the corresponding vector element in the second source SIMD&amp;FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/facgt-floating-point-absolute-compare-greater-than-vector">FACGT</a> Dd,Dm,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if abs then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element1);
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element2);
+    case cmp of
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_EQ" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_EQ</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareEQ.3" title="function: boolean FPCompareEQ(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareEQ</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GE" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GE</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGE.3" title="function: boolean FPCompareGE(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGE</a>(element1, element2, FPCR);
+        when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CompareOp_GT" title="enumeration CompareOp   {CompareOp_GT, CompareOp_GE, CompareOp_EQ,
+ CompareOp_LE, CompareOp_LT}">CompareOp_GT</a> test_passed = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPCompareGT.3" title="function: boolean FPCompareGT(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPCompareGT</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s8" type="checkbox"><label for="vtst_s8"><div>uint8x8_t <b><b>vtst_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s8" type="checkbox"><label for="vtstq_s8"><div>uint8x16_t <b><b>vtstq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s16" type="checkbox"><label for="vtst_s16"><div>uint16x4_t <b><b>vtst_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s16" type="checkbox"><label for="vtstq_s16"><div>uint16x8_t <b><b>vtstq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s32" type="checkbox"><label for="vtst_s32"><div>uint32x2_t <b><b>vtst_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s32" type="checkbox"><label for="vtstq_s32"><div>uint32x4_t <b><b>vtstq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u8" type="checkbox"><label for="vtst_u8"><div>uint8x8_t <b><b>vtst_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u8" type="checkbox"><label for="vtstq_u8"><div>uint8x16_t <b><b>vtstq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u16" type="checkbox"><label for="vtst_u16"><div>uint16x4_t <b><b>vtst_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u16" type="checkbox"><label for="vtstq_u16"><div>uint16x8_t <b><b>vtstq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u32" type="checkbox"><label for="vtst_u32"><div>uint32x2_t <b><b>vtst_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u32" type="checkbox"><label for="vtstq_u32"><div>uint32x4_t <b><b>vtstq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_p8" type="checkbox"><label for="vtst_p8"><div>uint8x8_t <b><b>vtst_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_p8" type="checkbox"><label for="vtstq_p8"><div>uint8x16_t <b><b>vtstq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtst_s64" type="checkbox"><label for="vtst_s64"><div>uint64x1_t <b><b>vtst_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_s64" type="checkbox"><label for="vtstq_s64"><div>uint64x2_t <b><b>vtstq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_u64" type="checkbox"><label for="vtst_u64"><div>uint64x1_t <b><b>vtst_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_u64" type="checkbox"><label for="vtstq_u64"><div>uint64x2_t <b><b>vtstq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtst_p64" type="checkbox"><label for="vtst_p64"><div>uint64x1_t <b><b>vtst_p64</b></b> (poly64x1_t a, poly64x1_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstq_p64" type="checkbox"><label for="vtstq_p64"><div>uint64x2_t <b><b>vtstq_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtstd_s64" type="checkbox"><label for="vtstd_s64"><div>uint64_t <b><b>vtstd_s64</b></b> (int64_t a, int64_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtstd_u64" type="checkbox"><label for="vtstd_u64"><div>uint64_t <b><b>vtstd_u64</b></b> (uint64_t a, uint64_t b)<span class="right">Compare bitwise test bits nonzero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&amp;FP register, performs an AND with the corresponding vector element in the second source SIMD&amp;FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&amp;FP register to zero.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmtst-compare-bitwise-test-bits-nonzero-vector">CMTST</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if and_test then
+        test_passed = !<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.IsZero.1" title="function: boolean IsZero(bits(N) x)">IsZero</a>(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if test_passed then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s8" type="checkbox"><label for="vabd_s8"><div>int8x8_t <b><b>vabd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s8" type="checkbox"><label for="vabdq_s8"><div>int8x16_t <b><b>vabdq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s16" type="checkbox"><label for="vabd_s16"><div>int16x4_t <b><b>vabd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s16" type="checkbox"><label for="vabdq_s16"><div>int16x8_t <b><b>vabdq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_s32" type="checkbox"><label for="vabd_s32"><div>int32x2_t <b><b>vabd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_s32" type="checkbox"><label for="vabdq_s32"><div>int32x4_t <b><b>vabdq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabd-signed-absolute-difference">SABD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u8" type="checkbox"><label for="vabd_u8"><div>uint8x8_t <b><b>vabd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u8" type="checkbox"><label for="vabdq_u8"><div>uint8x16_t <b><b>vabdq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u16" type="checkbox"><label for="vabd_u16"><div>uint16x4_t <b><b>vabd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u16" type="checkbox"><label for="vabdq_u16"><div>uint16x8_t <b><b>vabdq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_u32" type="checkbox"><label for="vabd_u32"><div>uint32x2_t <b><b>vabd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_u32" type="checkbox"><label for="vabdq_u32"><div>uint32x4_t <b><b>vabdq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabd-unsigned-absolute-difference-vector">UABD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_f32" type="checkbox"><label for="vabd_f32"><div>float32x2_t <b><b>vabd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_f32" type="checkbox"><label for="vabdq_f32"><div>float32x4_t <b><b>vabdq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabd_f64" type="checkbox"><label for="vabd_f64"><div>float64x1_t <b><b>vabd_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdq_f64" type="checkbox"><label for="vabdq_f64"><div>float64x2_t <b><b>vabdq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabds_f32" type="checkbox"><label for="vabds_f32"><div>float32_t <b><b>vabds_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdd_f64" type="checkbox"><label for="vabdd_f64"><div>float64_t <b><b>vabdd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point absolute difference</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&amp;FP register, from the corresponding floating-point values in the elements of the first source SIMD&amp;FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabd-floating-point-absolute-difference-vector">FABD</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    diff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSub.3" title="function: bits(N) FPSub(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPSub</a>(element1, element2, FPCR);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = if abs then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(diff) else diff;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s8" type="checkbox"><label for="vabdl_s8"><div>int16x8_t <b><b>vabdl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s16" type="checkbox"><label for="vabdl_s16"><div>int32x4_t <b><b>vabdl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_s32" type="checkbox"><label for="vabdl_s32"><div>int64x2_t <b><b>vabdl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u8" type="checkbox"><label for="vabdl_u8"><div>uint16x8_t <b><b>vabdl_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u16" type="checkbox"><label for="vabdl_u16"><div>uint32x4_t <b><b>vabdl_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_u32" type="checkbox"><label for="vabdl_u32"><div>uint64x2_t <b><b>vabdl_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s8" type="checkbox"><label for="vabdl_high_s8"><div>int16x8_t <b><b>vabdl_high_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s16" type="checkbox"><label for="vabdl_high_s16"><div>int32x4_t <b><b>vabdl_high_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_s32" type="checkbox"><label for="vabdl_high_s32"><div>int64x2_t <b><b>vabdl_high_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabdl-sabdl2-signed-absolute-difference-long">SABDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u8" type="checkbox"><label for="vabdl_high_u8"><div>uint16x8_t <b><b>vabdl_high_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u16" type="checkbox"><label for="vabdl_high_u16"><div>uint32x4_t <b><b>vabdl_high_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabdl_high_u32" type="checkbox"><label for="vabdl_high_u32"><div>uint64x2_t <b><b>vabdl_high_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned absolute difference long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabdl-uabdl2-unsigned-absolute-difference-long">UABDL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s8" type="checkbox"><label for="vaba_s8"><div>int8x8_t <b><b>vaba_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s8" type="checkbox"><label for="vabaq_s8"><div>int8x16_t <b><b>vabaq_s8</b></b> (int8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s16" type="checkbox"><label for="vaba_s16"><div>int16x4_t <b><b>vaba_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s16" type="checkbox"><label for="vabaq_s16"><div>int16x8_t <b><b>vabaq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_s32" type="checkbox"><label for="vaba_s32"><div>int32x2_t <b><b>vaba_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_s32" type="checkbox"><label for="vabaq_s32"><div>int32x4_t <b><b>vabaq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Signed absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saba-signed-absolute-difference-and-accumulate">SABA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u8" type="checkbox"><label for="vaba_u8"><div>uint8x8_t <b><b>vaba_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u8" type="checkbox"><label for="vabaq_u8"><div>uint8x16_t <b><b>vabaq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u16" type="checkbox"><label for="vaba_u16"><div>uint16x4_t <b><b>vaba_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u16" type="checkbox"><label for="vabaq_u16"><div>uint16x8_t <b><b>vabaq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaba_u32" type="checkbox"><label for="vaba_u32"><div>uint32x2_t <b><b>vaba_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabaq_u32" type="checkbox"><label for="vabaq_u32"><div>uint32x4_t <b><b>vabaq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned absolute difference and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&amp;FP register from the corresponding elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaba-unsigned-absolute-difference-and-accumulate">UABA</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s8" type="checkbox"><label for="vabal_s8"><div>int16x8_t <b><b>vabal_s8</b></b> (int16x8_t a, int8x8_t b, int8x8_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s16" type="checkbox"><label for="vabal_s16"><div>int32x4_t <b><b>vabal_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_s32" type="checkbox"><label for="vabal_s32"><div>int64x2_t <b><b>vabal_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u8" type="checkbox"><label for="vabal_u8"><div>uint16x8_t <b><b>vabal_u8</b></b> (uint16x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.8H,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u16" type="checkbox"><label for="vabal_u16"><div>uint32x4_t <b><b>vabal_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.4S,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_u32" type="checkbox"><label for="vabal_u32"><div>uint64x2_t <b><b>vabal_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL</a> Vd.2D,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s8" type="checkbox"><label for="vabal_high_s8"><div>int16x8_t <b><b>vabal_high_s8</b></b> (int16x8_t a, int8x16_t b, int8x16_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s16" type="checkbox"><label for="vabal_high_s16"><div>int32x4_t <b><b>vabal_high_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_s32" type="checkbox"><label for="vabal_high_s32"><div>int64x2_t <b><b>vabal_high_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t c)<span class="right">Signed absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sabal-sabal2-signed-absolute-difference-and-accumulate-long">SABAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u8" type="checkbox"><label for="vabal_high_u8"><div>uint16x8_t <b><b>vabal_high_u8</b></b> (uint16x8_t a, uint8x16_t b, uint8x16_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.8H,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u16" type="checkbox"><label for="vabal_high_u16"><div>uint32x4_t <b><b>vabal_high_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.4S,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabal_high_u32" type="checkbox"><label for="vabal_high_u32"><div>uint64x2_t <b><b>vabal_high_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t c)<span class="right">Unsigned absolute difference and accumulate long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&amp;FP register from the corresponding vector elements of the first source SIMD&amp;FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uabal-uabal2-unsigned-absolute-difference-and-accumulate-long">UABAL2</a> Vd.2D,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    absdiff = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element1-element2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + absdiff;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s8" type="checkbox"><label for="vmax_s8"><div>int8x8_t <b><b>vmax_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s8" type="checkbox"><label for="vmaxq_s8"><div>int8x16_t <b><b>vmaxq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s16" type="checkbox"><label for="vmax_s16"><div>int16x4_t <b><b>vmax_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s16" type="checkbox"><label for="vmaxq_s16"><div>int16x8_t <b><b>vmaxq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_s32" type="checkbox"><label for="vmax_s32"><div>int32x2_t <b><b>vmax_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_s32" type="checkbox"><label for="vmaxq_s32"><div>int32x4_t <b><b>vmaxq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smax-signed-maximum-vector">SMAX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u8" type="checkbox"><label for="vmax_u8"><div>uint8x8_t <b><b>vmax_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u8" type="checkbox"><label for="vmaxq_u8"><div>uint8x16_t <b><b>vmaxq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u16" type="checkbox"><label for="vmax_u16"><div>uint16x4_t <b><b>vmax_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u16" type="checkbox"><label for="vmaxq_u16"><div>uint16x8_t <b><b>vmaxq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_u32" type="checkbox"><label for="vmax_u32"><div>uint32x2_t <b><b>vmax_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_u32" type="checkbox"><label for="vmaxq_u32"><div>uint32x4_t <b><b>vmaxq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umax-unsigned-maximum-vector">UMAX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_f32" type="checkbox"><label for="vmax_f32"><div>float32x2_t <b><b>vmax_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_f32" type="checkbox"><label for="vmaxq_f32"><div>float32x4_t <b><b>vmaxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmax_f64" type="checkbox"><label for="vmax_f64"><div>float64x1_t <b><b>vmax_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxq_f64" type="checkbox"><label for="vmaxq_f64"><div>float64x2_t <b><b>vmaxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmax-vector-floating-point-maximum-vector">FMAX</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s8" type="checkbox"><label for="vmin_s8"><div>int8x8_t <b><b>vmin_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s8" type="checkbox"><label for="vminq_s8"><div>int8x16_t <b><b>vminq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s16" type="checkbox"><label for="vmin_s16"><div>int16x4_t <b><b>vmin_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s16" type="checkbox"><label for="vminq_s16"><div>int16x8_t <b><b>vminq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_s32" type="checkbox"><label for="vmin_s32"><div>int32x2_t <b><b>vmin_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_s32" type="checkbox"><label for="vminq_s32"><div>int32x4_t <b><b>vminq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smin-signed-minimum-vector">SMIN</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u8" type="checkbox"><label for="vmin_u8"><div>uint8x8_t <b><b>vmin_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u8" type="checkbox"><label for="vminq_u8"><div>uint8x16_t <b><b>vminq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u16" type="checkbox"><label for="vmin_u16"><div>uint16x4_t <b><b>vmin_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u16" type="checkbox"><label for="vminq_u16"><div>uint16x8_t <b><b>vminq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_u32" type="checkbox"><label for="vmin_u32"><div>uint32x2_t <b><b>vmin_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_u32" type="checkbox"><label for="vminq_u32"><div>uint32x4_t <b><b>vminq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umin-unsigned-minimum-vector">UMIN</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_f32" type="checkbox"><label for="vmin_f32"><div>float32x2_t <b><b>vmin_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminq_f32" type="checkbox"><label for="vminq_f32"><div>float32x4_t <b><b>vminq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmin_f64" type="checkbox"><label for="vmin_f64"><div>float64x1_t <b><b>vmin_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminq_f64" type="checkbox"><label for="vminq_f64"><div>float64x2_t <b><b>vminq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&amp;FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmin-vector-floating-point-minimum-vector">FMIN</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnm_f32" type="checkbox"><label for="vmaxnm_f32"><div>float32x2_t <b><b>vmaxnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmq_f32" type="checkbox"><label for="vmaxnmq_f32"><div>float32x4_t <b><b>vmaxnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnm_f64" type="checkbox"><label for="vmaxnm_f64"><div>float64x1_t <b><b>vmaxnm_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmq_f64" type="checkbox"><label for="vmaxnmq_f64"><div>float64x2_t <b><b>vmaxnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnm-vector-floating-point-maximum-number-vector">FMAXNM</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnm_f32" type="checkbox"><label for="vminnm_f32"><div>float32x2_t <b><b>vminnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminnmq_f32" type="checkbox"><label for="vminnmq_f32"><div>float32x4_t <b><b>vminnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vminnm_f64" type="checkbox"><label for="vminnm_f64"><div>float64x1_t <b><b>vminnm_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmq_f64" type="checkbox"><label for="vminnmq_f64"><div>float64x2_t <b><b>vminnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum number</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&amp;FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnm-vector-floating-point-minimum-number-vector">FMINNM</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s8" type="checkbox"><label for="vshl_s8"><div>int8x8_t <b><b>vshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s8" type="checkbox"><label for="vshlq_s8"><div>int8x16_t <b><b>vshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s16" type="checkbox"><label for="vshl_s16"><div>int16x4_t <b><b>vshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s16" type="checkbox"><label for="vshlq_s16"><div>int16x8_t <b><b>vshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s32" type="checkbox"><label for="vshl_s32"><div>int32x2_t <b><b>vshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s32" type="checkbox"><label for="vshlq_s32"><div>int32x4_t <b><b>vshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_s64" type="checkbox"><label for="vshl_s64"><div>int64x1_t <b><b>vshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_s64" type="checkbox"><label for="vshlq_s64"><div>int64x2_t <b><b>vshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u8" type="checkbox"><label for="vshl_u8"><div>uint8x8_t <b><b>vshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u8" type="checkbox"><label for="vshlq_u8"><div>uint8x16_t <b><b>vshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u16" type="checkbox"><label for="vshl_u16"><div>uint16x4_t <b><b>vshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u16" type="checkbox"><label for="vshlq_u16"><div>uint16x8_t <b><b>vshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u32" type="checkbox"><label for="vshl_u32"><div>uint32x2_t <b><b>vshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u32" type="checkbox"><label for="vshlq_u32"><div>uint32x4_t <b><b>vshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_u64" type="checkbox"><label for="vshl_u64"><div>uint64x1_t <b><b>vshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_u64" type="checkbox"><label for="vshlq_u64"><div>uint64x2_t <b><b>vshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshld_s64" type="checkbox"><label for="vshld_s64"><div>int64_t <b><b>vshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshl-signed-shift-left-register">SSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshld_u64" type="checkbox"><label for="vshld_u64"><div>uint64_t <b><b>vshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushl-unsigned-shift-left-register">USHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s8" type="checkbox"><label for="vqshl_s8"><div>int8x8_t <b><b>vqshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s8" type="checkbox"><label for="vqshlq_s8"><div>int8x16_t <b><b>vqshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s16" type="checkbox"><label for="vqshl_s16"><div>int16x4_t <b><b>vqshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s16" type="checkbox"><label for="vqshlq_s16"><div>int16x8_t <b><b>vqshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s32" type="checkbox"><label for="vqshl_s32"><div>int32x2_t <b><b>vqshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s32" type="checkbox"><label for="vqshlq_s32"><div>int32x4_t <b><b>vqshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_s64" type="checkbox"><label for="vqshl_s64"><div>int64x1_t <b><b>vqshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_s64" type="checkbox"><label for="vqshlq_s64"><div>int64x2_t <b><b>vqshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u8" type="checkbox"><label for="vqshl_u8"><div>uint8x8_t <b><b>vqshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u8" type="checkbox"><label for="vqshlq_u8"><div>uint8x16_t <b><b>vqshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u16" type="checkbox"><label for="vqshl_u16"><div>uint16x4_t <b><b>vqshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u16" type="checkbox"><label for="vqshlq_u16"><div>uint16x8_t <b><b>vqshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u32" type="checkbox"><label for="vqshl_u32"><div>uint32x2_t <b><b>vqshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u32" type="checkbox"><label for="vqshlq_u32"><div>uint32x4_t <b><b>vqshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_u64" type="checkbox"><label for="vqshl_u64"><div>uint64x1_t <b><b>vqshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_u64" type="checkbox"><label for="vqshlq_u64"><div>uint64x2_t <b><b>vqshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_s8" type="checkbox"><label for="vqshlb_s8"><div>int8_t <b><b>vqshlb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_s16" type="checkbox"><label for="vqshlh_s16"><div>int16_t <b><b>vqshlh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_s32" type="checkbox"><label for="vqshls_s32"><div>int32_t <b><b>vqshls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_s64" type="checkbox"><label for="vqshld_s64"><div>int64_t <b><b>vqshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_u8" type="checkbox"><label for="vqshlb_u8"><div>uint8_t <b><b>vqshlb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_u16" type="checkbox"><label for="vqshlh_u16"><div>uint16_t <b><b>vqshlh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_u32" type="checkbox"><label for="vqshls_u32"><div>uint32_t <b><b>vqshls_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_u64" type="checkbox"><label for="vqshld_u64"><div>uint64_t <b><b>vqshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s8" type="checkbox"><label for="vrshl_s8"><div>int8x8_t <b><b>vrshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s8" type="checkbox"><label for="vrshlq_s8"><div>int8x16_t <b><b>vrshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s16" type="checkbox"><label for="vrshl_s16"><div>int16x4_t <b><b>vrshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s16" type="checkbox"><label for="vrshlq_s16"><div>int16x8_t <b><b>vrshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s32" type="checkbox"><label for="vrshl_s32"><div>int32x2_t <b><b>vrshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s32" type="checkbox"><label for="vrshlq_s32"><div>int32x4_t <b><b>vrshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_s64" type="checkbox"><label for="vrshl_s64"><div>int64x1_t <b><b>vrshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_s64" type="checkbox"><label for="vrshlq_s64"><div>int64x2_t <b><b>vrshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u8" type="checkbox"><label for="vrshl_u8"><div>uint8x8_t <b><b>vrshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u8" type="checkbox"><label for="vrshlq_u8"><div>uint8x16_t <b><b>vrshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u16" type="checkbox"><label for="vrshl_u16"><div>uint16x4_t <b><b>vrshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u16" type="checkbox"><label for="vrshlq_u16"><div>uint16x8_t <b><b>vrshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u32" type="checkbox"><label for="vrshl_u32"><div>uint32x2_t <b><b>vrshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u32" type="checkbox"><label for="vrshlq_u32"><div>uint32x4_t <b><b>vrshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshl_u64" type="checkbox"><label for="vrshl_u64"><div>uint64x1_t <b><b>vrshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshlq_u64" type="checkbox"><label for="vrshlq_u64"><div>uint64x2_t <b><b>vrshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshld_s64" type="checkbox"><label for="vrshld_s64"><div>int64_t <b><b>vrshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshl-signed-rounding-shift-left-register">SRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshld_u64" type="checkbox"><label for="vrshld_u64"><div>uint64_t <b><b>vrshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshl-unsigned-rounding-shift-left-register">URSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s8" type="checkbox"><label for="vqrshl_s8"><div>int8x8_t <b><b>vqrshl_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s8" type="checkbox"><label for="vqrshlq_s8"><div>int8x16_t <b><b>vqrshlq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s16" type="checkbox"><label for="vqrshl_s16"><div>int16x4_t <b><b>vqrshl_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s16" type="checkbox"><label for="vqrshlq_s16"><div>int16x8_t <b><b>vqrshlq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s32" type="checkbox"><label for="vqrshl_s32"><div>int32x2_t <b><b>vqrshl_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s32" type="checkbox"><label for="vqrshlq_s32"><div>int32x4_t <b><b>vqrshlq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_s64" type="checkbox"><label for="vqrshl_s64"><div>int64x1_t <b><b>vqrshl_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_s64" type="checkbox"><label for="vqrshlq_s64"><div>int64x2_t <b><b>vqrshlq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u8" type="checkbox"><label for="vqrshl_u8"><div>uint8x8_t <b><b>vqrshl_u8</b></b> (uint8x8_t a, int8x8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u8" type="checkbox"><label for="vqrshlq_u8"><div>uint8x16_t <b><b>vqrshlq_u8</b></b> (uint8x16_t a, int8x16_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u16" type="checkbox"><label for="vqrshl_u16"><div>uint16x4_t <b><b>vqrshl_u16</b></b> (uint16x4_t a, int16x4_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u16" type="checkbox"><label for="vqrshlq_u16"><div>uint16x8_t <b><b>vqrshlq_u16</b></b> (uint16x8_t a, int16x8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u32" type="checkbox"><label for="vqrshl_u32"><div>uint32x2_t <b><b>vqrshl_u32</b></b> (uint32x2_t a, int32x2_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u32" type="checkbox"><label for="vqrshlq_u32"><div>uint32x4_t <b><b>vqrshlq_u32</b></b> (uint32x4_t a, int32x4_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshl_u64" type="checkbox"><label for="vqrshl_u64"><div>uint64x1_t <b><b>vqrshl_u64</b></b> (uint64x1_t a, int64x1_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlq_u64" type="checkbox"><label for="vqrshlq_u64"><div>uint64x2_t <b><b>vqrshlq_u64</b></b> (uint64x2_t a, int64x2_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlb_s8" type="checkbox"><label for="vqrshlb_s8"><div>int8_t <b><b>vqrshlb_s8</b></b> (int8_t a, int8_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlh_s16" type="checkbox"><label for="vqrshlh_s16"><div>int16_t <b><b>vqrshlh_s16</b></b> (int16_t a, int16_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshls_s32" type="checkbox"><label for="vqrshls_s32"><div>int32_t <b><b>vqrshls_s32</b></b> (int32_t a, int32_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshld_s64" type="checkbox"><label for="vqrshld_s64"><div>int64_t <b><b>vqrshld_s64</b></b> (int64_t a, int64_t b)<span class="right">Signed saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&amp;FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshl-signed-saturating-rounding-shift-left-register">SQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlb_u8" type="checkbox"><label for="vqrshlb_u8"><div>uint8_t <b><b>vqrshlb_u8</b></b> (uint8_t a, int8_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Bd,Bn,Bm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+b &rarr; Bm </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshlh_u16" type="checkbox"><label for="vqrshlh_u16"><div>uint16_t <b><b>vqrshlh_u16</b></b> (uint16_t a, int16_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Hd,Hn,Hm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+b &rarr; Hm </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshls_u32" type="checkbox"><label for="vqrshls_u32"><div>uint32_t <b><b>vqrshls_u32</b></b> (uint32_t a, int32_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshld_u64" type="checkbox"><label for="vqrshld_u64"><div>uint64_t <b><b>vqrshld_u64</b></b> (uint64_t a, int64_t b)<span class="right">Unsigned saturating rounding shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&amp;FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshl-unsigned-saturating-rounding-shift-left-register">UQRSHL</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s8" type="checkbox"><label for="vshr_n_s8"><div>int8x8_t <b><b>vshr_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s8" type="checkbox"><label for="vshrq_n_s8"><div>int8x16_t <b><b>vshrq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s16" type="checkbox"><label for="vshr_n_s16"><div>int16x4_t <b><b>vshr_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s16" type="checkbox"><label for="vshrq_n_s16"><div>int16x8_t <b><b>vshrq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s32" type="checkbox"><label for="vshr_n_s32"><div>int32x2_t <b><b>vshr_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s32" type="checkbox"><label for="vshrq_n_s32"><div>int32x4_t <b><b>vshrq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_s64" type="checkbox"><label for="vshr_n_s64"><div>int64x1_t <b><b>vshr_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_s64" type="checkbox"><label for="vshrq_n_s64"><div>int64x2_t <b><b>vshrq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u8" type="checkbox"><label for="vshr_n_u8"><div>uint8x8_t <b><b>vshr_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u8" type="checkbox"><label for="vshrq_n_u8"><div>uint8x16_t <b><b>vshrq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u16" type="checkbox"><label for="vshr_n_u16"><div>uint16x4_t <b><b>vshr_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u16" type="checkbox"><label for="vshrq_n_u16"><div>uint16x8_t <b><b>vshrq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u32" type="checkbox"><label for="vshr_n_u32"><div>uint32x2_t <b><b>vshr_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u32" type="checkbox"><label for="vshrq_n_u32"><div>uint32x4_t <b><b>vshrq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshr_n_u64" type="checkbox"><label for="vshr_n_u64"><div>uint64x1_t <b><b>vshr_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrq_n_u64" type="checkbox"><label for="vshrq_n_u64"><div>uint64x2_t <b><b>vshrq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrd_n_s64" type="checkbox"><label for="vshrd_n_s64"><div>int64_t <b><b>vshrd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshr-signed-shift-right-immediate">SSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrd_n_u64" type="checkbox"><label for="vshrd_n_u64"><div>uint64_t <b><b>vshrd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushr-unsigned-shift-right-immediate">USHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s8" type="checkbox"><label for="vshl_n_s8"><div>int8x8_t <b><b>vshl_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s8" type="checkbox"><label for="vshlq_n_s8"><div>int8x16_t <b><b>vshlq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s16" type="checkbox"><label for="vshl_n_s16"><div>int16x4_t <b><b>vshl_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s16" type="checkbox"><label for="vshlq_n_s16"><div>int16x8_t <b><b>vshlq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s32" type="checkbox"><label for="vshl_n_s32"><div>int32x2_t <b><b>vshl_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s32" type="checkbox"><label for="vshlq_n_s32"><div>int32x4_t <b><b>vshlq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_s64" type="checkbox"><label for="vshl_n_s64"><div>int64x1_t <b><b>vshl_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_s64" type="checkbox"><label for="vshlq_n_s64"><div>int64x2_t <b><b>vshlq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u8" type="checkbox"><label for="vshl_n_u8"><div>uint8x8_t <b><b>vshl_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u8" type="checkbox"><label for="vshlq_n_u8"><div>uint8x16_t <b><b>vshlq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u16" type="checkbox"><label for="vshl_n_u16"><div>uint16x4_t <b><b>vshl_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u16" type="checkbox"><label for="vshlq_n_u16"><div>uint16x8_t <b><b>vshlq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u32" type="checkbox"><label for="vshl_n_u32"><div>uint32x2_t <b><b>vshl_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u32" type="checkbox"><label for="vshlq_n_u32"><div>uint32x4_t <b><b>vshlq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshl_n_u64" type="checkbox"><label for="vshl_n_u64"><div>uint64x1_t <b><b>vshl_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshlq_n_u64" type="checkbox"><label for="vshlq_n_u64"><div>uint64x2_t <b><b>vshlq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshld_n_s64" type="checkbox"><label for="vshld_n_s64"><div>int64_t <b><b>vshld_n_s64</b></b> (int64_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshld_n_u64" type="checkbox"><label for="vshld_n_u64"><div>uint64_t <b><b>vshld_n_u64</b></b> (uint64_t a, const int n)<span class="right">Shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shl-shift-left-immediate">SHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s8" type="checkbox"><label for="vrshr_n_s8"><div>int8x8_t <b><b>vrshr_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s8" type="checkbox"><label for="vrshrq_n_s8"><div>int8x16_t <b><b>vrshrq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s16" type="checkbox"><label for="vrshr_n_s16"><div>int16x4_t <b><b>vrshr_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s16" type="checkbox"><label for="vrshrq_n_s16"><div>int16x8_t <b><b>vrshrq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s32" type="checkbox"><label for="vrshr_n_s32"><div>int32x2_t <b><b>vrshr_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s32" type="checkbox"><label for="vrshrq_n_s32"><div>int32x4_t <b><b>vrshrq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_s64" type="checkbox"><label for="vrshr_n_s64"><div>int64x1_t <b><b>vrshr_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_s64" type="checkbox"><label for="vrshrq_n_s64"><div>int64x2_t <b><b>vrshrq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u8" type="checkbox"><label for="vrshr_n_u8"><div>uint8x8_t <b><b>vrshr_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u8" type="checkbox"><label for="vrshrq_n_u8"><div>uint8x16_t <b><b>vrshrq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u16" type="checkbox"><label for="vrshr_n_u16"><div>uint16x4_t <b><b>vrshr_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u16" type="checkbox"><label for="vrshrq_n_u16"><div>uint16x8_t <b><b>vrshrq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u32" type="checkbox"><label for="vrshr_n_u32"><div>uint32x2_t <b><b>vrshr_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u32" type="checkbox"><label for="vrshrq_n_u32"><div>uint32x4_t <b><b>vrshrq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshr_n_u64" type="checkbox"><label for="vrshr_n_u64"><div>uint64x1_t <b><b>vrshr_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrq_n_u64" type="checkbox"><label for="vrshrq_n_u64"><div>uint64x2_t <b><b>vrshrq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrd_n_s64" type="checkbox"><label for="vrshrd_n_s64"><div>int64_t <b><b>vrshrd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srshr-signed-rounding-shift-right-immediate">SRSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrd_n_u64" type="checkbox"><label for="vrshrd_n_u64"><div>uint64_t <b><b>vrshrd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned rounding shift right</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USHR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urshr-unsigned-rounding-shift-right-immediate">URSHR</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s8" type="checkbox"><label for="vsra_n_s8"><div>int8x8_t <b><b>vsra_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s8" type="checkbox"><label for="vsraq_n_s8"><div>int8x16_t <b><b>vsraq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s16" type="checkbox"><label for="vsra_n_s16"><div>int16x4_t <b><b>vsra_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s16" type="checkbox"><label for="vsraq_n_s16"><div>int16x8_t <b><b>vsraq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s32" type="checkbox"><label for="vsra_n_s32"><div>int32x2_t <b><b>vsra_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s32" type="checkbox"><label for="vsraq_n_s32"><div>int32x4_t <b><b>vsraq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_s64" type="checkbox"><label for="vsra_n_s64"><div>int64x1_t <b><b>vsra_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_s64" type="checkbox"><label for="vsraq_n_s64"><div>int64x2_t <b><b>vsraq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u8" type="checkbox"><label for="vsra_n_u8"><div>uint8x8_t <b><b>vsra_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u8" type="checkbox"><label for="vsraq_n_u8"><div>uint8x16_t <b><b>vsraq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u16" type="checkbox"><label for="vsra_n_u16"><div>uint16x4_t <b><b>vsra_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u16" type="checkbox"><label for="vsraq_n_u16"><div>uint16x8_t <b><b>vsraq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u32" type="checkbox"><label for="vsra_n_u32"><div>uint32x2_t <b><b>vsra_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u32" type="checkbox"><label for="vsraq_n_u32"><div>uint32x4_t <b><b>vsraq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsra_n_u64" type="checkbox"><label for="vsra_n_u64"><div>uint64x1_t <b><b>vsra_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsraq_n_u64" type="checkbox"><label for="vsraq_n_u64"><div>uint64x2_t <b><b>vsraq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsrad_n_s64" type="checkbox"><label for="vsrad_n_s64"><div>int64_t <b><b>vsrad_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Signed shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SRSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ssra-signed-shift-right-and-accumulate-immediate">SSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsrad_n_u64" type="checkbox"><label for="vsrad_n_u64"><div>uint64_t <b><b>vsrad_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Unsigned shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">URSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/usra-unsigned-shift-right-and-accumulate-immediate">USRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s8" type="checkbox"><label for="vrsra_n_s8"><div>int8x8_t <b><b>vrsra_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s8" type="checkbox"><label for="vrsraq_n_s8"><div>int8x16_t <b><b>vrsraq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s16" type="checkbox"><label for="vrsra_n_s16"><div>int16x4_t <b><b>vrsra_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s16" type="checkbox"><label for="vrsraq_n_s16"><div>int16x8_t <b><b>vrsraq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s32" type="checkbox"><label for="vrsra_n_s32"><div>int32x2_t <b><b>vrsra_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s32" type="checkbox"><label for="vrsraq_n_s32"><div>int32x4_t <b><b>vrsraq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_s64" type="checkbox"><label for="vrsra_n_s64"><div>int64x1_t <b><b>vrsra_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_s64" type="checkbox"><label for="vrsraq_n_s64"><div>int64x2_t <b><b>vrsraq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u8" type="checkbox"><label for="vrsra_n_u8"><div>uint8x8_t <b><b>vrsra_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u8" type="checkbox"><label for="vrsraq_n_u8"><div>uint8x16_t <b><b>vrsraq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u16" type="checkbox"><label for="vrsra_n_u16"><div>uint16x4_t <b><b>vrsra_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u16" type="checkbox"><label for="vrsraq_n_u16"><div>uint16x8_t <b><b>vrsraq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u32" type="checkbox"><label for="vrsra_n_u32"><div>uint32x2_t <b><b>vrsra_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u32" type="checkbox"><label for="vrsraq_n_u32"><div>uint32x4_t <b><b>vrsraq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsra_n_u64" type="checkbox"><label for="vrsra_n_u64"><div>uint64x1_t <b><b>vrsra_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsraq_n_u64" type="checkbox"><label for="vrsraq_n_u64"><div>uint64x2_t <b><b>vrsraq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsrad_n_s64" type="checkbox"><label for="vrsrad_n_s64"><div>int64_t <b><b>vrsrad_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Signed rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SSRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/srsra-signed-rounding-shift-right-and-accumulate-immediate">SRSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsrad_n_u64" type="checkbox"><label for="vrsrad_n_u64"><div>uint64_t <b><b>vrsrad_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Unsigned rounding shift right and accumulate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">USRA</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursra-unsigned-rounding-shift-right-and-accumulate-immediate">URSRA</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] + element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s8" type="checkbox"><label for="vqshl_n_s8"><div>int8x8_t <b><b>vqshl_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s8" type="checkbox"><label for="vqshlq_n_s8"><div>int8x16_t <b><b>vqshlq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s16" type="checkbox"><label for="vqshl_n_s16"><div>int16x4_t <b><b>vqshl_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s16" type="checkbox"><label for="vqshlq_n_s16"><div>int16x8_t <b><b>vqshlq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s32" type="checkbox"><label for="vqshl_n_s32"><div>int32x2_t <b><b>vqshl_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s32" type="checkbox"><label for="vqshlq_n_s32"><div>int32x4_t <b><b>vqshlq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_s64" type="checkbox"><label for="vqshl_n_s64"><div>int64x1_t <b><b>vqshl_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_s64" type="checkbox"><label for="vqshlq_n_s64"><div>int64x2_t <b><b>vqshlq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u8" type="checkbox"><label for="vqshl_n_u8"><div>uint8x8_t <b><b>vqshl_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u8" type="checkbox"><label for="vqshlq_n_u8"><div>uint8x16_t <b><b>vqshlq_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u16" type="checkbox"><label for="vqshl_n_u16"><div>uint16x4_t <b><b>vqshl_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u16" type="checkbox"><label for="vqshlq_n_u16"><div>uint16x8_t <b><b>vqshlq_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u32" type="checkbox"><label for="vqshl_n_u32"><div>uint32x2_t <b><b>vqshl_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u32" type="checkbox"><label for="vqshlq_n_u32"><div>uint32x4_t <b><b>vqshlq_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshl_n_u64" type="checkbox"><label for="vqshl_n_u64"><div>uint64x1_t <b><b>vqshl_n_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlq_n_u64" type="checkbox"><label for="vqshlq_n_u64"><div>uint64x2_t <b><b>vqshlq_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_n_s8" type="checkbox"><label for="vqshlb_n_s8"><div>int8_t <b><b>vqshlb_n_s8</b></b> (int8_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Bd,Bn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_n_s16" type="checkbox"><label for="vqshlh_n_s16"><div>int16_t <b><b>vqshlh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Hd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_n_s32" type="checkbox"><label for="vqshls_n_s32"><div>int32_t <b><b>vqshls_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_n_s64" type="checkbox"><label for="vqshld_n_s64"><div>int64_t <b><b>vqshld_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshl-register-signed-saturating-shift-left-register">SQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlb_n_u8" type="checkbox"><label for="vqshlb_n_u8"><div>uint8_t <b><b>vqshlb_n_u8</b></b> (uint8_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Bd,Bn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlh_n_u16" type="checkbox"><label for="vqshlh_n_u16"><div>uint16_t <b><b>vqshlh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Hd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshls_n_u32" type="checkbox"><label for="vqshls_n_u32"><div>uint32_t <b><b>vqshls_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshld_n_u64" type="checkbox"><label for="vqshld_n_u64"><div>uint64_t <b><b>vqshld_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating shift left</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&amp;FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&amp;FP register, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshl-register-unsigned-saturating-shift-left-register">UQSHL</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]&lt;7:0&gt;);
+    if rounding then
+        round_const = 1 &lt;&lt; (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned) + round_const) &lt;&lt; shift;
+    if saturating then
+        (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s8" type="checkbox"><label for="vqshlu_n_s8"><div>uint8x8_t <b><b>vqshlu_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s8" type="checkbox"><label for="vqshluq_n_s8"><div>uint8x16_t <b><b>vqshluq_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s16" type="checkbox"><label for="vqshlu_n_s16"><div>uint16x4_t <b><b>vqshlu_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s16" type="checkbox"><label for="vqshluq_n_s16"><div>uint16x8_t <b><b>vqshluq_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s32" type="checkbox"><label for="vqshlu_n_s32"><div>uint32x2_t <b><b>vqshlu_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s32" type="checkbox"><label for="vqshluq_n_s32"><div>uint32x4_t <b><b>vqshluq_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlu_n_s64" type="checkbox"><label for="vqshlu_n_s64"><div>uint64x1_t <b><b>vqshlu_n_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshluq_n_s64" type="checkbox"><label for="vqshluq_n_s64"><div>uint64x2_t <b><b>vqshluq_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshlub_n_s8" type="checkbox"><label for="vqshlub_n_s8"><div>uint8_t <b><b>vqshlub_n_s8</b></b> (int8_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Bd,Bn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshluh_n_s16" type="checkbox"><label for="vqshluh_n_s16"><div>uint16_t <b><b>vqshluh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Hd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlus_n_s32" type="checkbox"><label for="vqshlus_n_s32"><div>uint32_t <b><b>vqshlus_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshlud_n_s64" type="checkbox"><label for="vqshlud_n_s64"><div>uint64_t <b><b>vqshlud_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift left unsigned</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHL</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshlu-signed-saturating-shift-left-unsigned-immediate">SQSHLU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], src_unsigned) &lt;&lt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s16" type="checkbox"><label for="vshrn_n_s16"><div>int8x8_t <b><b>vshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s32" type="checkbox"><label for="vshrn_n_s32"><div>int16x4_t <b><b>vshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_s64" type="checkbox"><label for="vshrn_n_s64"><div>int32x2_t <b><b>vshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u16" type="checkbox"><label for="vshrn_n_u16"><div>uint8x8_t <b><b>vshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u32" type="checkbox"><label for="vshrn_n_u32"><div>uint16x4_t <b><b>vshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_n_u64" type="checkbox"><label for="vshrn_n_u64"><div>uint32x2_t <b><b>vshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s16" type="checkbox"><label for="vshrn_high_n_s16"><div>int8x16_t <b><b>vshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s32" type="checkbox"><label for="vshrn_high_n_s32"><div>int16x8_t <b><b>vshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_s64" type="checkbox"><label for="vshrn_high_n_s64"><div>int32x4_t <b><b>vshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u16" type="checkbox"><label for="vshrn_high_n_u16"><div>uint8x16_t <b><b>vshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u32" type="checkbox"><label for="vshrn_high_n_u32"><div>uint16x8_t <b><b>vshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshrn_high_n_u64" type="checkbox"><label for="vshrn_high_n_u64"><div>uint32x4_t <b><b>vshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">RSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shrn-shrn2-shift-right-narrow-immediate">SHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s16" type="checkbox"><label for="vqshrun_n_s16"><div>uint8x8_t <b><b>vqshrun_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s32" type="checkbox"><label for="vqshrun_n_s32"><div>uint16x4_t <b><b>vqshrun_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_n_s64" type="checkbox"><label for="vqshrun_n_s64"><div>uint32x2_t <b><b>vqshrun_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrunh_n_s16" type="checkbox"><label for="vqshrunh_n_s16"><div>uint8_t <b><b>vqshrunh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshruns_n_s32" type="checkbox"><label for="vqshruns_n_s32"><div>uint16_t <b><b>vqshruns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrund_n_s64" type="checkbox"><label for="vqshrund_n_s64"><div>uint32_t <b><b>vqshrund_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s16" type="checkbox"><label for="vqshrun_high_n_s16"><div>uint8x16_t <b><b>vqshrun_high_n_s16</b></b> (uint8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s32" type="checkbox"><label for="vqshrun_high_n_s32"><div>uint16x8_t <b><b>vqshrun_high_n_s32</b></b> (uint16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrun_high_n_s64" type="checkbox"><label for="vqshrun_high_n_s64"><div>uint32x4_t <b><b>vqshrun_high_n_s64</b></b> (uint32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrun-sqshrun2-signed-saturating-shift-right-unsigned-narrow-immediate">SQSHRUN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s16" type="checkbox"><label for="vqrshrun_n_s16"><div>uint8x8_t <b><b>vqrshrun_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s32" type="checkbox"><label for="vqrshrun_n_s32"><div>uint16x4_t <b><b>vqrshrun_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_n_s64" type="checkbox"><label for="vqrshrun_n_s64"><div>uint32x2_t <b><b>vqrshrun_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrunh_n_s16" type="checkbox"><label for="vqrshrunh_n_s16"><div>uint8_t <b><b>vqrshrunh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshruns_n_s32" type="checkbox"><label for="vqrshruns_n_s32"><div>uint16_t <b><b>vqrshruns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrund_n_s64" type="checkbox"><label for="vqrshrund_n_s64"><div>uint32_t <b><b>vqrshrund_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s16" type="checkbox"><label for="vqrshrun_high_n_s16"><div>uint8x16_t <b><b>vqrshrun_high_n_s16</b></b> (uint8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s32" type="checkbox"><label for="vqrshrun_high_n_s32"><div>uint16x8_t <b><b>vqrshrun_high_n_s32</b></b> (uint16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrun_high_n_s64" type="checkbox"><label for="vqrshrun_high_n_s64"><div>uint32x4_t <b><b>vqrshrun_high_n_s64</b></b> (uint32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&amp;FP register. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRUN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrun-sqrshrun2-signed-saturating-rounded-shift-right-unsigned-narrow-immediate">SQRSHRUN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s16" type="checkbox"><label for="vqshrn_n_s16"><div>int8x8_t <b><b>vqshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s32" type="checkbox"><label for="vqshrn_n_s32"><div>int16x4_t <b><b>vqshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_s64" type="checkbox"><label for="vqshrn_n_s64"><div>int32x2_t <b><b>vqshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u16" type="checkbox"><label for="vqshrn_n_u16"><div>uint8x8_t <b><b>vqshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u32" type="checkbox"><label for="vqshrn_n_u32"><div>uint16x4_t <b><b>vqshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_n_u64" type="checkbox"><label for="vqshrn_n_u64"><div>uint32x2_t <b><b>vqshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnh_n_s16" type="checkbox"><label for="vqshrnh_n_s16"><div>int8_t <b><b>vqshrnh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrns_n_s32" type="checkbox"><label for="vqshrns_n_s32"><div>int16_t <b><b>vqshrns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnd_n_s64" type="checkbox"><label for="vqshrnd_n_s64"><div>int32_t <b><b>vqshrnd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnh_n_u16" type="checkbox"><label for="vqshrnh_n_u16"><div>uint8_t <b><b>vqshrnh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrns_n_u32" type="checkbox"><label for="vqshrns_n_u32"><div>uint16_t <b><b>vqshrns_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrnd_n_u64" type="checkbox"><label for="vqshrnd_n_u64"><div>uint32_t <b><b>vqshrnd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s16" type="checkbox"><label for="vqshrn_high_n_s16"><div>int8x16_t <b><b>vqshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s32" type="checkbox"><label for="vqshrn_high_n_s32"><div>int16x8_t <b><b>vqshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_s64" type="checkbox"><label for="vqshrn_high_n_s64"><div>int32x4_t <b><b>vqshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqshrn-sqshrn2-signed-saturating-shift-right-narrow-immediate">SQSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u16" type="checkbox"><label for="vqshrn_high_n_u16"><div>uint8x16_t <b><b>vqshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u32" type="checkbox"><label for="vqshrn_high_n_u32"><div>uint16x8_t <b><b>vqshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqshrn_high_n_u64" type="checkbox"><label for="vqshrn_high_n_u64"><div>uint32x4_t <b><b>vqshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Unsigned saturating shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQRSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqshrn-uqshrn2-unsigned-saturating-shift-right-narrow-immediate">UQSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s16" type="checkbox"><label for="vrshrn_n_s16"><div>int8x8_t <b><b>vrshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s32" type="checkbox"><label for="vrshrn_n_s32"><div>int16x4_t <b><b>vrshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_s64" type="checkbox"><label for="vrshrn_n_s64"><div>int32x2_t <b><b>vrshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u16" type="checkbox"><label for="vrshrn_n_u16"><div>uint8x8_t <b><b>vrshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u32" type="checkbox"><label for="vrshrn_n_u32"><div>uint16x4_t <b><b>vrshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_n_u64" type="checkbox"><label for="vrshrn_n_u64"><div>uint32x2_t <b><b>vrshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s16" type="checkbox"><label for="vrshrn_high_n_s16"><div>int8x16_t <b><b>vrshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s32" type="checkbox"><label for="vrshrn_high_n_s32"><div>int16x8_t <b><b>vrshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_s64" type="checkbox"><label for="vrshrn_high_n_s64"><div>int32x4_t <b><b>vrshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u16" type="checkbox"><label for="vrshrn_high_n_u16"><div>uint8x16_t <b><b>vrshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u32" type="checkbox"><label for="vrshrn_high_n_u32"><div>uint16x8_t <b><b>vrshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrshrn_high_n_u64" type="checkbox"><label for="vrshrn_high_n_u64"><div>uint32x4_t <b><b>vrshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Rounding shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&amp;FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rshrn-rshrn2-rounding-shift-right-narrow-immediate">RSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; 32(Vd) <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize]) + round_const) &gt;&gt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s16" type="checkbox"><label for="vqrshrn_n_s16"><div>int8x8_t <b><b>vqrshrn_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s32" type="checkbox"><label for="vqrshrn_n_s32"><div>int16x4_t <b><b>vqrshrn_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_s64" type="checkbox"><label for="vqrshrn_n_s64"><div>int32x2_t <b><b>vqrshrn_n_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u16" type="checkbox"><label for="vqrshrn_n_u16"><div>uint8x8_t <b><b>vqrshrn_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.8B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u32" type="checkbox"><label for="vqrshrn_n_u32"><div>uint16x4_t <b><b>vqrshrn_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.4H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_n_u64" type="checkbox"><label for="vqrshrn_n_u64"><div>uint32x2_t <b><b>vqrshrn_n_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Vd.2S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnh_n_s16" type="checkbox"><label for="vqrshrnh_n_s16"><div>int8_t <b><b>vqrshrnh_n_s16</b></b> (int16_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrns_n_s32" type="checkbox"><label for="vqrshrns_n_s32"><div>int16_t <b><b>vqrshrns_n_s32</b></b> (int32_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnd_n_s64" type="checkbox"><label for="vqrshrnd_n_s64"><div>int32_t <b><b>vqrshrnd_n_s64</b></b> (int64_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnh_n_u16" type="checkbox"><label for="vqrshrnh_n_u16"><div>uint8_t <b><b>vqrshrnh_n_u16</b></b> (uint16_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Bd,Hn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrns_n_u32" type="checkbox"><label for="vqrshrns_n_u32"><div>uint16_t <b><b>vqrshrns_n_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Hd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrnd_n_u64" type="checkbox"><label for="vqrshrnd_n_u64"><div>uint32_t <b><b>vqrshrnd_n_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN</a> Sd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s16" type="checkbox"><label for="vqrshrn_high_n_s16"><div>int8x16_t <b><b>vqrshrn_high_n_s16</b></b> (int8x8_t r, int16x8_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s32" type="checkbox"><label for="vqrshrn_high_n_s32"><div>int16x8_t <b><b>vqrshrn_high_n_s32</b></b> (int16x4_t r, int32x4_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_s64" type="checkbox"><label for="vqrshrn_high_n_s64"><div>int32x4_t <b><b>vqrshrn_high_n_s64</b></b> (int32x2_t r, int64x2_t a, const int n)<span class="right">Signed saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">SQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrshrn-sqrshrn2-signed-saturating-rounded-shift-right-narrow-immediate">SQRSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u16" type="checkbox"><label for="vqrshrn_high_n_u16"><div>uint8x16_t <b><b>vqrshrn_high_n_u16</b></b> (uint8x8_t r, uint16x8_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.16B,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u32" type="checkbox"><label for="vqrshrn_high_n_u32"><div>uint16x8_t <b><b>vqrshrn_high_n_u32</b></b> (uint16x4_t r, uint32x4_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.8H,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrshrn_high_n_u64" type="checkbox"><label for="vqrshrn_high_n_u64"><div>uint32x4_t <b><b>vqrshrn_high_n_u64</b></b> (uint32x2_t r, uint64x2_t a, const int n)<span class="right">Unsigned saturating rounded shift right narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see <a class="armarm-xref" title="Reference to ARM ARM section">UQSHRN</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqrshrn-uqrshrn2-unsigned-saturating-rounded-shift-right-narrow-immediate">UQRSHRN2</a> Vd.4S,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize*2) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer round_const = if round then (1 &lt;&lt; (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], unsigned) + round_const) &gt;&gt; shift;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s8" type="checkbox"><label for="vshll_n_s8"><div>int16x8_t <b><b>vshll_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s16" type="checkbox"><label for="vshll_n_s16"><div>int32x4_t <b><b>vshll_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s32" type="checkbox"><label for="vshll_n_s32"><div>int64x2_t <b><b>vshll_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u8" type="checkbox"><label for="vshll_n_u8"><div>uint16x8_t <b><b>vshll_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u16" type="checkbox"><label for="vshll_n_u16"><div>uint32x4_t <b><b>vshll_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u32" type="checkbox"><label for="vshll_n_u32"><div>uint64x2_t <b><b>vshll_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s8" type="checkbox"><label for="vshll_high_n_s8"><div>int16x8_t <b><b>vshll_high_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s16" type="checkbox"><label for="vshll_high_n_s16"><div>int32x4_t <b><b>vshll_high_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s32" type="checkbox"><label for="vshll_high_n_s32"><div>int64x2_t <b><b>vshll_high_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u8" type="checkbox"><label for="vshll_high_n_u8"><div>uint16x8_t <b><b>vshll_high_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u16" type="checkbox"><label for="vshll_high_n_u16"><div>uint32x4_t <b><b>vshll_high_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u32" type="checkbox"><label for="vshll_high_n_u32"><div>uint64x2_t <b><b>vshll_high_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s8" type="checkbox"><label for="vshll_n_s8"><div>int16x8_t <b><b>vshll_n_s8</b></b> (int8x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s16" type="checkbox"><label for="vshll_n_s16"><div>int32x4_t <b><b>vshll_n_s16</b></b> (int16x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_s32" type="checkbox"><label for="vshll_n_s32"><div>int64x2_t <b><b>vshll_n_s32</b></b> (int32x2_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u8" type="checkbox"><label for="vshll_n_u8"><div>uint16x8_t <b><b>vshll_n_u8</b></b> (uint8x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.8H,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u16" type="checkbox"><label for="vshll_n_u16"><div>uint32x4_t <b><b>vshll_n_u16</b></b> (uint16x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.4S,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_n_u32" type="checkbox"><label for="vshll_n_u32"><div>uint64x2_t <b><b>vshll_n_u32</b></b> (uint32x2_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL</a> Vd.2D,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s8" type="checkbox"><label for="vshll_high_n_s8"><div>int16x8_t <b><b>vshll_high_n_s8</b></b> (int8x16_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s16" type="checkbox"><label for="vshll_high_n_s16"><div>int32x4_t <b><b>vshll_high_n_s16</b></b> (int16x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_s32" type="checkbox"><label for="vshll_high_n_s32"><div>int64x2_t <b><b>vshll_high_n_s32</b></b> (int32x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u8" type="checkbox"><label for="vshll_high_n_u8"><div>uint16x8_t <b><b>vshll_high_n_u8</b></b> (uint8x16_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.8H,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+8 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u16" type="checkbox"><label for="vshll_high_n_u16"><div>uint32x4_t <b><b>vshll_high_n_u16</b></b> (uint16x8_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.4S,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+16 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vshll_high_n_u32" type="checkbox"><label for="vshll_high_n_u32"><div>uint64x2_t <b><b>vshll_high_n_u32</b></b> (uint32x4_t a, const int n)<span class="right">Shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/shll-shll2-shift-left-long-by-element-size">SHLL2</a> Vd.2D,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+32 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s8" type="checkbox"><label for="vsri_n_s8"><div>int8x8_t <b><b>vsri_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s8" type="checkbox"><label for="vsriq_n_s8"><div>int8x16_t <b><b>vsriq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s16" type="checkbox"><label for="vsri_n_s16"><div>int16x4_t <b><b>vsri_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s16" type="checkbox"><label for="vsriq_n_s16"><div>int16x8_t <b><b>vsriq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s32" type="checkbox"><label for="vsri_n_s32"><div>int32x2_t <b><b>vsri_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s32" type="checkbox"><label for="vsriq_n_s32"><div>int32x4_t <b><b>vsriq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_s64" type="checkbox"><label for="vsri_n_s64"><div>int64x1_t <b><b>vsri_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_s64" type="checkbox"><label for="vsriq_n_s64"><div>int64x2_t <b><b>vsriq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u8" type="checkbox"><label for="vsri_n_u8"><div>uint8x8_t <b><b>vsri_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u8" type="checkbox"><label for="vsriq_n_u8"><div>uint8x16_t <b><b>vsriq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u16" type="checkbox"><label for="vsri_n_u16"><div>uint16x4_t <b><b>vsri_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u16" type="checkbox"><label for="vsriq_n_u16"><div>uint16x8_t <b><b>vsriq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u32" type="checkbox"><label for="vsri_n_u32"><div>uint32x2_t <b><b>vsri_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u32" type="checkbox"><label for="vsriq_n_u32"><div>uint32x4_t <b><b>vsriq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_u64" type="checkbox"><label for="vsri_n_u64"><div>uint64x1_t <b><b>vsri_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_u64" type="checkbox"><label for="vsriq_n_u64"><div>uint64x2_t <b><b>vsriq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p64" type="checkbox"><label for="vsri_n_p64"><div>poly64x1_t <b><b>vsri_n_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p64" type="checkbox"><label for="vsriq_n_p64"><div>poly64x2_t <b><b>vsriq_n_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p8" type="checkbox"><label for="vsri_n_p8"><div>poly8x8_t <b><b>vsri_n_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p8" type="checkbox"><label for="vsriq_n_p8"><div>poly8x16_t <b><b>vsriq_n_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+1 &lt;&lt; n &lt;&lt; 8 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsri_n_p16" type="checkbox"><label for="vsri_n_p16"><div>poly16x4_t <b><b>vsri_n_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsriq_n_p16" type="checkbox"><label for="vsriq_n_p16"><div>poly16x8_t <b><b>vsriq_n_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+1 &lt;&lt; n &lt;&lt; 16 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsrid_n_s64" type="checkbox"><label for="vsrid_n_s64"><div>int64_t <b><b>vsrid_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsrid_n_u64" type="checkbox"><label for="vsrid_n_u64"><div>uint64_t <b><b>vsrid_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Shift right and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sri-shift-right-and-insert-immediate">SRI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s8" type="checkbox"><label for="vsli_n_s8"><div>int8x8_t <b><b>vsli_n_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s8" type="checkbox"><label for="vsliq_n_s8"><div>int8x16_t <b><b>vsliq_n_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s16" type="checkbox"><label for="vsli_n_s16"><div>int16x4_t <b><b>vsli_n_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s16" type="checkbox"><label for="vsliq_n_s16"><div>int16x8_t <b><b>vsliq_n_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s32" type="checkbox"><label for="vsli_n_s32"><div>int32x2_t <b><b>vsli_n_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s32" type="checkbox"><label for="vsliq_n_s32"><div>int32x4_t <b><b>vsliq_n_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_s64" type="checkbox"><label for="vsli_n_s64"><div>int64x1_t <b><b>vsli_n_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_s64" type="checkbox"><label for="vsliq_n_s64"><div>int64x2_t <b><b>vsliq_n_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u8" type="checkbox"><label for="vsli_n_u8"><div>uint8x8_t <b><b>vsli_n_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u8" type="checkbox"><label for="vsliq_n_u8"><div>uint8x16_t <b><b>vsliq_n_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u16" type="checkbox"><label for="vsli_n_u16"><div>uint16x4_t <b><b>vsli_n_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u16" type="checkbox"><label for="vsliq_n_u16"><div>uint16x8_t <b><b>vsliq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u32" type="checkbox"><label for="vsli_n_u32"><div>uint32x2_t <b><b>vsli_n_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u32" type="checkbox"><label for="vsliq_n_u32"><div>uint32x4_t <b><b>vsliq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; n &lt;&lt; 31 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_u64" type="checkbox"><label for="vsli_n_u64"><div>uint64x1_t <b><b>vsli_n_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_u64" type="checkbox"><label for="vsliq_n_u64"><div>uint64x2_t <b><b>vsliq_n_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p64" type="checkbox"><label for="vsli_n_p64"><div>poly64x1_t <b><b>vsli_n_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p64" type="checkbox"><label for="vsliq_n_p64"><div>poly64x2_t <b><b>vsliq_n_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p8" type="checkbox"><label for="vsli_n_p8"><div>poly8x8_t <b><b>vsli_n_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8B,Vn.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p8" type="checkbox"><label for="vsliq_n_p8"><div>poly8x16_t <b><b>vsliq_n_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.16B,Vn.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsli_n_p16" type="checkbox"><label for="vsli_n_p16"><div>poly16x4_t <b><b>vsli_n_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.4H,Vn.4H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsliq_n_p16" type="checkbox"><label for="vsliq_n_p16"><div>poly16x8_t <b><b>vsliq_n_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Vd.8H,Vn.8H,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vslid_n_s64" type="checkbox"><label for="vslid_n_s64"><div>int64_t <b><b>vslid_n_s64</b></b> (int64_t a, int64_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vslid_n_u64" type="checkbox"><label for="vslid_n_u64"><div>uint64_t <b><b>vslid_n_u64</b></b> (uint64_t a, uint64_t b, const int n)<span class="right">Shift left and insert</span></div></label><article>      <h4>Description</h4><p><p class="aml">Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&amp;FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&amp;FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sli-shift-left-and-insert-immediate">SLI</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Dn <br />
+0 &lt;&lt; n &lt;&lt; 63 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) mask = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.1" title="function: bits(N) Ones(integer N)">Ones</a>(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSL.2" title="function: bits(N) LSL(bits(N) x, integer shift)">LSL</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], shift);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize] AND NOT(mask)) OR shifted;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_s32_f32" type="checkbox"><label for="vcvt_s32_f32"><div>int32x2_t <b><b>vcvt_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_s32_f32" type="checkbox"><label for="vcvtq_s32_f32"><div>int32x4_t <b><b>vcvtq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_u32_f32" type="checkbox"><label for="vcvt_u32_f32"><div>uint32x2_t <b><b>vcvt_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_u32_f32" type="checkbox"><label for="vcvtq_u32_f32"><div>uint32x4_t <b><b>vcvtq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_s32_f32" type="checkbox"><label for="vcvtn_s32_f32"><div>int32x2_t <b><b>vcvtn_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_s32_f32" type="checkbox"><label for="vcvtnq_s32_f32"><div>int32x4_t <b><b>vcvtnq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_u32_f32" type="checkbox"><label for="vcvtn_u32_f32"><div>uint32x2_t <b><b>vcvtn_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_u32_f32" type="checkbox"><label for="vcvtnq_u32_f32"><div>uint32x4_t <b><b>vcvtnq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_s32_f32" type="checkbox"><label for="vcvtm_s32_f32"><div>int32x2_t <b><b>vcvtm_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_s32_f32" type="checkbox"><label for="vcvtmq_s32_f32"><div>int32x4_t <b><b>vcvtmq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_u32_f32" type="checkbox"><label for="vcvtm_u32_f32"><div>uint32x2_t <b><b>vcvtm_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_u32_f32" type="checkbox"><label for="vcvtmq_u32_f32"><div>uint32x4_t <b><b>vcvtmq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_s32_f32" type="checkbox"><label for="vcvtp_s32_f32"><div>int32x2_t <b><b>vcvtp_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_s32_f32" type="checkbox"><label for="vcvtpq_s32_f32"><div>int32x4_t <b><b>vcvtpq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_u32_f32" type="checkbox"><label for="vcvtp_u32_f32"><div>uint32x2_t <b><b>vcvtp_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_u32_f32" type="checkbox"><label for="vcvtpq_u32_f32"><div>uint32x4_t <b><b>vcvtpq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_s32_f32" type="checkbox"><label for="vcvta_s32_f32"><div>int32x2_t <b><b>vcvta_s32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_s32_f32" type="checkbox"><label for="vcvtaq_s32_f32"><div>int32x4_t <b><b>vcvtaq_s32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_u32_f32" type="checkbox"><label for="vcvta_u32_f32"><div>uint32x2_t <b><b>vcvta_u32_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_u32_f32" type="checkbox"><label for="vcvtaq_u32_f32"><div>uint32x4_t <b><b>vcvtaq_u32_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_s32_f32" type="checkbox"><label for="vcvts_s32_f32"><div>int32_t <b><b>vcvts_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_u32_f32" type="checkbox"><label for="vcvts_u32_f32"><div>uint32_t <b><b>vcvts_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtns_s32_f32" type="checkbox"><label for="vcvtns_s32_f32"><div>int32_t <b><b>vcvtns_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtns_u32_f32" type="checkbox"><label for="vcvtns_u32_f32"><div>uint32_t <b><b>vcvtns_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtms_s32_f32" type="checkbox"><label for="vcvtms_s32_f32"><div>int32_t <b><b>vcvtms_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtms_u32_f32" type="checkbox"><label for="vcvtms_u32_f32"><div>uint32_t <b><b>vcvtms_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtps_s32_f32" type="checkbox"><label for="vcvtps_s32_f32"><div>int32_t <b><b>vcvtps_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtps_u32_f32" type="checkbox"><label for="vcvtps_u32_f32"><div>uint32_t <b><b>vcvtps_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtas_s32_f32" type="checkbox"><label for="vcvtas_s32_f32"><div>int32_t <b><b>vcvtas_s32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtas_u32_f32" type="checkbox"><label for="vcvtas_u32_f32"><div>uint32_t <b><b>vcvtas_u32_f32</b></b> (float32_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_s64_f64" type="checkbox"><label for="vcvt_s64_f64"><div>int64x1_t <b><b>vcvt_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_s64_f64" type="checkbox"><label for="vcvtq_s64_f64"><div>int64x2_t <b><b>vcvtq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_u64_f64" type="checkbox"><label for="vcvt_u64_f64"><div>uint64x1_t <b><b>vcvt_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_u64_f64" type="checkbox"><label for="vcvtq_u64_f64"><div>uint64x2_t <b><b>vcvtq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_s64_f64" type="checkbox"><label for="vcvtn_s64_f64"><div>int64x1_t <b><b>vcvtn_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_s64_f64" type="checkbox"><label for="vcvtnq_s64_f64"><div>int64x2_t <b><b>vcvtnq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtn_u64_f64" type="checkbox"><label for="vcvtn_u64_f64"><div>uint64x1_t <b><b>vcvtn_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnq_u64_f64" type="checkbox"><label for="vcvtnq_u64_f64"><div>uint64x2_t <b><b>vcvtnq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_s64_f64" type="checkbox"><label for="vcvtm_s64_f64"><div>int64x1_t <b><b>vcvtm_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_s64_f64" type="checkbox"><label for="vcvtmq_s64_f64"><div>int64x2_t <b><b>vcvtmq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtm_u64_f64" type="checkbox"><label for="vcvtm_u64_f64"><div>uint64x1_t <b><b>vcvtm_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmq_u64_f64" type="checkbox"><label for="vcvtmq_u64_f64"><div>uint64x2_t <b><b>vcvtmq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_s64_f64" type="checkbox"><label for="vcvtp_s64_f64"><div>int64x1_t <b><b>vcvtp_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_s64_f64" type="checkbox"><label for="vcvtpq_s64_f64"><div>int64x2_t <b><b>vcvtpq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtp_u64_f64" type="checkbox"><label for="vcvtp_u64_f64"><div>uint64x1_t <b><b>vcvtp_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpq_u64_f64" type="checkbox"><label for="vcvtpq_u64_f64"><div>uint64x2_t <b><b>vcvtpq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_s64_f64" type="checkbox"><label for="vcvta_s64_f64"><div>int64x1_t <b><b>vcvta_s64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_s64_f64" type="checkbox"><label for="vcvtaq_s64_f64"><div>int64x2_t <b><b>vcvtaq_s64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvta_u64_f64" type="checkbox"><label for="vcvta_u64_f64"><div>uint64x1_t <b><b>vcvta_u64_f64</b></b> (float64x1_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtaq_u64_f64" type="checkbox"><label for="vcvtaq_u64_f64"><div>uint64x2_t <b><b>vcvtaq_u64_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_s64_f64" type="checkbox"><label for="vcvtd_s64_f64"><div>int64_t <b><b>vcvtd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_u64_f64" type="checkbox"><label for="vcvtd_u64_f64"><div>uint64_t <b><b>vcvtd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnd_s64_f64" type="checkbox"><label for="vcvtnd_s64_f64"><div>int64_t <b><b>vcvtnd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtns-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtnd_u64_f64" type="checkbox"><label for="vcvtnd_u64_f64"><div>uint64_t <b><b>vcvtnd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtnu-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-even-vector">FCVTNU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmd_s64_f64" type="checkbox"><label for="vcvtmd_s64_f64"><div>int64_t <b><b>vcvtmd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtms-vector-floating-point-convert-to-signed-integer-rounding-toward-minus-infinity-vector">FCVTMS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtmd_u64_f64" type="checkbox"><label for="vcvtmd_u64_f64"><div>uint64_t <b><b>vcvtmd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtmu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-minus-infinity-vector">FCVTMU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpd_s64_f64" type="checkbox"><label for="vcvtpd_s64_f64"><div>int64_t <b><b>vcvtpd_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtps-vector-floating-point-convert-to-signed-integer-rounding-toward-plus-infinity-vector">FCVTPS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtpd_u64_f64" type="checkbox"><label for="vcvtpd_u64_f64"><div>uint64_t <b><b>vcvtpd_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtpu-vector-floating-point-convert-to-unsigned-integer-rounding-toward-plus-infinity-vector">FCVTPU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtad_s64_f64" type="checkbox"><label for="vcvtad_s64_f64"><div>int64_t <b><b>vcvtad_s64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to signed integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtas-vector-floating-point-convert-to-signed-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtad_u64_f64" type="checkbox"><label for="vcvtad_u64_f64"><div>uint64_t <b><b>vcvtad_u64_f64</b></b> (float64_t a)<span class="right">Floating-point convert to unsigned integer, rounding to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtau-vector-floating-point-convert-to-unsigned-integer-rounding-to-nearest-with-ties-to-away-vector">FCVTAU</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_s32_f32" type="checkbox"><label for="vcvt_n_s32_f32"><div>int32x2_t <b><b>vcvt_n_s32_f32</b></b> (float32x2_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_s32_f32" type="checkbox"><label for="vcvtq_n_s32_f32"><div>int32x4_t <b><b>vcvtq_n_s32_f32</b></b> (float32x4_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_u32_f32" type="checkbox"><label for="vcvt_n_u32_f32"><div>uint32x2_t <b><b>vcvt_n_u32_f32</b></b> (float32x2_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_u32_f32" type="checkbox"><label for="vcvtq_n_u32_f32"><div>uint32x4_t <b><b>vcvtq_n_u32_f32</b></b> (float32x4_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_s32_f32" type="checkbox"><label for="vcvts_n_s32_f32"><div>int32_t <b><b>vcvts_n_s32_f32</b></b> (float32_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_u32_f32" type="checkbox"><label for="vcvts_n_u32_f32"><div>uint32_t <b><b>vcvts_n_u32_f32</b></b> (float32_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_s64_f64" type="checkbox"><label for="vcvt_n_s64_f64"><div>int64x1_t <b><b>vcvt_n_s64_f64</b></b> (float64x1_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_s64_f64" type="checkbox"><label for="vcvtq_n_s64_f64"><div>int64x2_t <b><b>vcvtq_n_s64_f64</b></b> (float64x2_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_u64_f64" type="checkbox"><label for="vcvt_n_u64_f64"><div>uint64x1_t <b><b>vcvt_n_u64_f64</b></b> (float64x1_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_u64_f64" type="checkbox"><label for="vcvtq_n_u64_f64"><div>uint64x2_t <b><b>vcvtq_n_u64_f64</b></b> (float64x2_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_s64_f64" type="checkbox"><label for="vcvtd_n_s64_f64"><div>int64_t <b><b>vcvtd_n_s64_f64</b></b> (float64_t a, const int n)<span class="right">Floating-point convert to signed integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzs-vector-integer-floating-point-convert-to-signed-integer-rounding-toward-zero-vector">FCVTZS</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_u64_f64" type="checkbox"><label for="vcvtd_n_u64_f64"><div>uint64_t <b><b>vcvtd_n_u64_f64</b></b> (float64_t a, const int n)<span class="right">Floating-point convert to unsigned integer, rounding toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtzu-vector-integer-floating-point-convert-to-unsigned-integer-rounding-toward-zero-vector">FCVTZU</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPToFixed.5" title="function: bits(M) FPToFixed(bits(N) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FPToFixed</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_s32" type="checkbox"><label for="vcvt_f32_s32"><div>float32x2_t <b><b>vcvt_f32_s32</b></b> (int32x2_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f32_s32" type="checkbox"><label for="vcvtq_f32_s32"><div>float32x4_t <b><b>vcvtq_f32_s32</b></b> (int32x4_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_u32" type="checkbox"><label for="vcvt_f32_u32"><div>float32x2_t <b><b>vcvt_f32_u32</b></b> (uint32x2_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f32_u32" type="checkbox"><label for="vcvtq_f32_u32"><div>float32x4_t <b><b>vcvtq_f32_u32</b></b> (uint32x4_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_f32_s32" type="checkbox"><label for="vcvts_f32_s32"><div>float32_t <b><b>vcvts_f32_s32</b></b> (int32_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_f32_u32" type="checkbox"><label for="vcvts_f32_u32"><div>float32_t <b><b>vcvts_f32_u32</b></b> (uint32_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_s64" type="checkbox"><label for="vcvt_f64_s64"><div>float64x1_t <b><b>vcvt_f64_s64</b></b> (int64x1_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f64_s64" type="checkbox"><label for="vcvtq_f64_s64"><div>float64x2_t <b><b>vcvtq_f64_s64</b></b> (int64x2_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_u64" type="checkbox"><label for="vcvt_f64_u64"><div>float64x1_t <b><b>vcvt_f64_u64</b></b> (uint64x1_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_f64_u64" type="checkbox"><label for="vcvtq_f64_u64"><div>float64x2_t <b><b>vcvtq_f64_u64</b></b> (uint64x2_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_f64_s64" type="checkbox"><label for="vcvtd_f64_s64"><div>float64_t <b><b>vcvtd_f64_s64</b></b> (int64_t a)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_f64_u64" type="checkbox"><label for="vcvtd_f64_u64"><div>float64_t <b><b>vcvtd_f64_u64</b></b> (uint64_t a)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f32_s32" type="checkbox"><label for="vcvt_n_f32_s32"><div>float32x2_t <b><b>vcvt_n_f32_s32</b></b> (int32x2_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f32_s32" type="checkbox"><label for="vcvtq_n_f32_s32"><div>float32x4_t <b><b>vcvtq_n_f32_s32</b></b> (int32x4_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f32_u32" type="checkbox"><label for="vcvt_n_f32_u32"><div>float32x2_t <b><b>vcvt_n_f32_u32</b></b> (uint32x2_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2S,Vn.2S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f32_u32" type="checkbox"><label for="vcvtq_n_f32_u32"><div>float32x4_t <b><b>vcvtq_n_f32_u32</b></b> (uint32x4_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.4S,Vn.4S,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_f32_s32" type="checkbox"><label for="vcvts_n_f32_s32"><div>float32_t <b><b>vcvts_n_f32_s32</b></b> (int32_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvts_n_f32_u32" type="checkbox"><label for="vcvts_n_f32_u32"><div>float32_t <b><b>vcvts_n_f32_u32</b></b> (uint32_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Sd,Sn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+1 &lt;&lt; n &lt;&lt; 32 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f64_s64" type="checkbox"><label for="vcvt_n_f64_s64"><div>float64x1_t <b><b>vcvt_n_f64_s64</b></b> (int64x1_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f64_s64" type="checkbox"><label for="vcvtq_n_f64_s64"><div>float64x2_t <b><b>vcvtq_n_f64_s64</b></b> (int64x2_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_n_f64_u64" type="checkbox"><label for="vcvt_n_f64_u64"><div>float64x1_t <b><b>vcvt_n_f64_u64</b></b> (uint64x1_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtq_n_f64_u64" type="checkbox"><label for="vcvtq_n_f64_u64"><div>float64x2_t <b><b>vcvtq_n_f64_u64</b></b> (uint64x2_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Vd.2D,Vn.2D,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_f64_s64" type="checkbox"><label for="vcvtd_n_f64_s64"><div>float64_t <b><b>vcvtd_n_f64_s64</b></b> (int64_t a, const int n)<span class="right">Signed integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/scvtf-vector-integer-signed-integer-convert-to-floating-point-vector">SCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtd_n_f64_u64" type="checkbox"><label for="vcvtd_n_f64_u64"><div>float64_t <b><b>vcvtd_n_f64_u64</b></b> (uint64_t a, const int n)<span class="right">Unsigned integer convert to floating-point</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ucvtf-vector-integer-unsigned-integer-convert-to-floating-point-vector">UCVTF</a> Dd,Dn,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+1 &lt;&lt; n &lt;&lt; 64 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding</a> rounding = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundingMode.1" title="function: FPRounding FPRoundingMode(FPCRType fpcr)">FPRoundingMode</a>(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FixedToFP.5" title="function: bits(N) FixedToFP(bits(M) op, integer fbits, boolean unsigned, FPCRType fpcr, FPRounding rounding)">FixedToFP</a>(element, 0, unsigned, FPCR, rounding);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f16_f32" type="checkbox"><label for="vcvt_f16_f32"><div>float16x4_t <b><b>vcvt_f16_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f16_f32" type="checkbox"><label for="vcvt_high_f16_f32"><div>float16x8_t <b><b>vcvt_high_f16_f32</b></b> (float16x4_t r, float32x4_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_f64" type="checkbox"><label for="vcvt_f32_f64"><div>float32x2_t <b><b>vcvt_f32_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f32_f64" type="checkbox"><label for="vcvt_high_f32_f64"><div>float32x4_t <b><b>vcvt_high_f32_f64</b></b> (float32x2_t r, float64x2_t a)<span class="right">Floating-point convert to lower precision narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&amp;FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtn-fcvtn2-floating-point-convert-to-lower-precision-narrow-vector">FCVTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f32_f16" type="checkbox"><label for="vcvt_f32_f16"><div>float32x4_t <b><b>vcvt_f32_f16</b></b> (float16x4_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL</a> Vd.4S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f32_f16" type="checkbox"><label for="vcvt_high_f32_f16"><div>float32x4_t <b><b>vcvt_high_f32_f16</b></b> (float16x8_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL2</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_f64_f32" type="checkbox"><label for="vcvt_f64_f32"><div>float64x2_t <b><b>vcvt_f64_f32</b></b> (float32x2_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL</a> Vd.2D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvt_high_f64_f32" type="checkbox"><label for="vcvt_high_f64_f32"><div>float64x2_t <b><b>vcvt_high_f64_f32</b></b> (float32x4_t a)<span class="right">Floating-point convert to higher precision long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&amp;FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes each result to the equivalent element of the vector in the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtl-fcvtl2-floating-point-convert-to-higher-precision-long-vector">FCVTL2</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.2" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtx_f32_f64" type="checkbox"><label for="vcvtx_f32_f64"><div>float32x2_t <b><b>vcvtx_f32_f64</b></b> (float64x2_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtxd_f32_f64" type="checkbox"><label for="vcvtxd_f32_f64"><div>float32_t <b><b>vcvtxd_f32_f64</b></b> (float64_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcvtx_high_f32_f64" type="checkbox"><label for="vcvtx_high_f32_f64"><div>float32x4_t <b><b>vcvtx_high_f32_f64</b></b> (float32x2_t r, float64x2_t a)<span class="right">Floating-point convert to lower precision narrow, rounding to odd</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&amp;FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fcvtxn-fcvtxn2-floating-point-convert-to-lower-precision-narrow-rounding-to-odd-vector">FCVTXN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPConvert.3" title="function: bits(M) FPConvert(bits(N) op, FPCRType fpcr, FPRounding rounding)">FPConvert</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize], FPCR, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#FPRounding_ODD" title="enumeration FPRounding  {FPRounding_TIEEVEN, FPRounding_POSINF,
+ FPRounding_NEGINF,  FPRounding_ZERO,
+ FPRounding_TIEAWAY, FPRounding_ODD}">FPRounding_ODD</a>);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrnd_f32" type="checkbox"><label for="vrnd_f32"><div>float32x2_t <b><b>vrnd_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndq_f32" type="checkbox"><label for="vrndq_f32"><div>float32x4_t <b><b>vrndq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrnd_f64" type="checkbox"><label for="vrnd_f64"><div>float64x1_t <b><b>vrnd_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndq_f64" type="checkbox"><label for="vrndq_f64"><div>float64x2_t <b><b>vrndq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward zero</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintz-vector-floating-point-round-to-integral-toward-zero-vector">FRINTZ</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndn_f32" type="checkbox"><label for="vrndn_f32"><div>float32x2_t <b><b>vrndn_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndnq_f32" type="checkbox"><label for="vrndnq_f32"><div>float32x4_t <b><b>vrndnq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndn_f64" type="checkbox"><label for="vrndn_f64"><div>float64x1_t <b><b>vrndn_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndnq_f64" type="checkbox"><label for="vrndnq_f64"><div>float64x2_t <b><b>vrndnq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndns_f32" type="checkbox"><label for="vrndns_f32"><div>float32_t <b><b>vrndns_f32</b></b> (float32_t a)<span class="right">Floating-point round to integral, to nearest with ties to even</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintn-vector-floating-point-round-to-integral-to-nearest-with-ties-to-even-vector">FRINTN</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndm_f32" type="checkbox"><label for="vrndm_f32"><div>float32x2_t <b><b>vrndm_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndmq_f32" type="checkbox"><label for="vrndmq_f32"><div>float32x4_t <b><b>vrndmq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndm_f64" type="checkbox"><label for="vrndm_f64"><div>float64x1_t <b><b>vrndm_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndmq_f64" type="checkbox"><label for="vrndmq_f64"><div>float64x2_t <b><b>vrndmq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward minus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintm-vector-floating-point-round-to-integral-toward-minus-infinity-vector">FRINTM</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndp_f32" type="checkbox"><label for="vrndp_f32"><div>float32x2_t <b><b>vrndp_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndpq_f32" type="checkbox"><label for="vrndpq_f32"><div>float32x4_t <b><b>vrndpq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndp_f64" type="checkbox"><label for="vrndp_f64"><div>float64x1_t <b><b>vrndp_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndpq_f64" type="checkbox"><label for="vrndpq_f64"><div>float64x2_t <b><b>vrndpq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, toward plus infinity</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintp-vector-floating-point-round-to-integral-toward-plus-infinity-vector">FRINTP</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrnda_f32" type="checkbox"><label for="vrnda_f32"><div>float32x2_t <b><b>vrnda_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndaq_f32" type="checkbox"><label for="vrndaq_f32"><div>float32x4_t <b><b>vrndaq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrnda_f64" type="checkbox"><label for="vrnda_f64"><div>float64x1_t <b><b>vrnda_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndaq_f64" type="checkbox"><label for="vrndaq_f64"><div>float64x2_t <b><b>vrndaq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, to nearest with ties to away</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinta-vector-floating-point-round-to-integral-to-nearest-with-ties-to-away-vector">FRINTA</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndi_f32" type="checkbox"><label for="vrndi_f32"><div>float32x2_t <b><b>vrndi_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndiq_f32" type="checkbox"><label for="vrndiq_f32"><div>float32x4_t <b><b>vrndiq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndi_f64" type="checkbox"><label for="vrndi_f64"><div>float64x1_t <b><b>vrndi_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndiq_f64" type="checkbox"><label for="vrndiq_f64"><div>float64x2_t <b><b>vrndiq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frinti-vector-floating-point-round-to-integral-using-current-rounding-mode-vector">FRINTI</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndx_f32" type="checkbox"><label for="vrndx_f32"><div>float32x2_t <b><b>vrndx_f32</b></b> (float32x2_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndxq_f32" type="checkbox"><label for="vrndxq_f32"><div>float32x4_t <b><b>vrndxq_f32</b></b> (float32x4_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrndx_f64" type="checkbox"><label for="vrndx_f64"><div>float64x1_t <b><b>vrndx_f64</b></b> (float64x1_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrndxq_f64" type="checkbox"><label for="vrndxq_f64"><div>float64x2_t <b><b>vrndxq_f64</b></b> (float64x2_t a)<span class="right">Floating-point round to integral exact, using current rounding mode</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&amp;FP source register to integral floating-point values of the same size using the rounding mode that is determined by the <a class="armarm-xref" title="Reference to ARM ARM section">FPCR</a>, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frintx-vector-floating-point-round-to-integral-exact-using-current-rounding-mode-vector">FRINTX</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRoundInt.4" title="function: bits(N) FPRoundInt(bits(N) op, FPCRType fpcr, FPRounding rounding, boolean exact)">FPRoundInt</a>(element, FPCR, rounding, exact);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s16" type="checkbox"><label for="vmovn_s16"><div>int8x8_t <b><b>vmovn_s16</b></b> (int16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s32" type="checkbox"><label for="vmovn_s32"><div>int16x4_t <b><b>vmovn_s32</b></b> (int32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_s64" type="checkbox"><label for="vmovn_s64"><div>int32x2_t <b><b>vmovn_s64</b></b> (int64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u16" type="checkbox"><label for="vmovn_u16"><div>uint8x8_t <b><b>vmovn_u16</b></b> (uint16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u32" type="checkbox"><label for="vmovn_u32"><div>uint16x4_t <b><b>vmovn_u32</b></b> (uint32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_u64" type="checkbox"><label for="vmovn_u64"><div>uint32x2_t <b><b>vmovn_u64</b></b> (uint64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s16" type="checkbox"><label for="vmovn_high_s16"><div>int8x16_t <b><b>vmovn_high_s16</b></b> (int8x8_t r, int16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s32" type="checkbox"><label for="vmovn_high_s32"><div>int16x8_t <b><b>vmovn_high_s32</b></b> (int16x4_t r, int32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_s64" type="checkbox"><label for="vmovn_high_s64"><div>int32x4_t <b><b>vmovn_high_s64</b></b> (int32x2_t r, int64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u16" type="checkbox"><label for="vmovn_high_u16"><div>uint8x16_t <b><b>vmovn_high_u16</b></b> (uint8x8_t r, uint16x8_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u32" type="checkbox"><label for="vmovn_high_u32"><div>uint16x8_t <b><b>vmovn_high_u32</b></b> (uint16x4_t r, uint32x4_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovn_high_u64" type="checkbox"><label for="vmovn_high_u64"><div>uint32x4_t <b><b>vmovn_high_u64</b></b> (uint32x2_t r, uint64x2_t a)<span class="right">Extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/xtn-xtn2-extract-narrow">XTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s8" type="checkbox"><label for="vmovl_s8"><div>int16x8_t <b><b>vmovl_s8</b></b> (int8x8_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.8H,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s16" type="checkbox"><label for="vmovl_s16"><div>int32x4_t <b><b>vmovl_s16</b></b> (int16x4_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.4S,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_s32" type="checkbox"><label for="vmovl_s32"><div>int64x2_t <b><b>vmovl_s32</b></b> (int32x2_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL</a> Vd.2D,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u8" type="checkbox"><label for="vmovl_u8"><div>uint16x8_t <b><b>vmovl_u8</b></b> (uint8x8_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.8H,Vn.8B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u16" type="checkbox"><label for="vmovl_u16"><div>uint32x4_t <b><b>vmovl_u16</b></b> (uint16x4_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.4S,Vn.4H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_u32" type="checkbox"><label for="vmovl_u32"><div>uint64x2_t <b><b>vmovl_u32</b></b> (uint32x2_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL</a> Vd.2D,Vn.2S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s8" type="checkbox"><label for="vmovl_high_s8"><div>int16x8_t <b><b>vmovl_high_s8</b></b> (int8x16_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.8H,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s16" type="checkbox"><label for="vmovl_high_s16"><div>int32x4_t <b><b>vmovl_high_s16</b></b> (int16x8_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.4S,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_s32" type="checkbox"><label for="vmovl_high_s32"><div>int64x2_t <b><b>vmovl_high_s32</b></b> (int32x4_t a)<span class="right">Signed shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&amp;FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sshll-sshll2-signed-shift-left-long-immediate">SSHLL2</a> Vd.2D,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u8" type="checkbox"><label for="vmovl_high_u8"><div>uint16x8_t <b><b>vmovl_high_u8</b></b> (uint8x16_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.8H,Vn.16B,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u16" type="checkbox"><label for="vmovl_high_u16"><div>uint32x4_t <b><b>vmovl_high_u16</b></b> (uint16x8_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.4S,Vn.8H,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovl_high_u32" type="checkbox"><label for="vmovl_high_u32"><div>uint64x2_t <b><b>vmovl_high_u32</b></b> (uint32x4_t a)<span class="right">Unsigned shift left long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&amp;FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ushll-ushll2-unsigned-shift-left-long-immediate">USHLL2</a> Vd.2D,Vn.4S,#0
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned) &lt;&lt; shift;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = element&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s16" type="checkbox"><label for="vqmovn_s16"><div>int8x8_t <b><b>vqmovn_s16</b></b> (int16x8_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s32" type="checkbox"><label for="vqmovn_s32"><div>int16x4_t <b><b>vqmovn_s32</b></b> (int32x4_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_s64" type="checkbox"><label for="vqmovn_s64"><div>int32x2_t <b><b>vqmovn_s64</b></b> (int64x2_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u16" type="checkbox"><label for="vqmovn_u16"><div>uint8x8_t <b><b>vqmovn_u16</b></b> (uint16x8_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u32" type="checkbox"><label for="vqmovn_u32"><div>uint16x4_t <b><b>vqmovn_u32</b></b> (uint32x4_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_u64" type="checkbox"><label for="vqmovn_u64"><div>uint32x2_t <b><b>vqmovn_u64</b></b> (uint64x2_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnh_s16" type="checkbox"><label for="vqmovnh_s16"><div>int8_t <b><b>vqmovnh_s16</b></b> (int16_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Bd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovns_s32" type="checkbox"><label for="vqmovns_s32"><div>int16_t <b><b>vqmovns_s32</b></b> (int32_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Hd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnd_s64" type="checkbox"><label for="vqmovnd_s64"><div>int32_t <b><b>vqmovnd_s64</b></b> (int64_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnh_u16" type="checkbox"><label for="vqmovnh_u16"><div>uint8_t <b><b>vqmovnh_u16</b></b> (uint16_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Bd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovns_u32" type="checkbox"><label for="vqmovns_u32"><div>uint16_t <b><b>vqmovns_u32</b></b> (uint32_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Hd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovnd_u64" type="checkbox"><label for="vqmovnd_u64"><div>uint32_t <b><b>vqmovnd_u64</b></b> (uint64_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s16" type="checkbox"><label for="vqmovn_high_s16"><div>int8x16_t <b><b>vqmovn_high_s16</b></b> (int8x8_t r, int16x8_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s32" type="checkbox"><label for="vqmovn_high_s32"><div>int16x8_t <b><b>vqmovn_high_s32</b></b> (int16x4_t r, int32x4_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_s64" type="checkbox"><label for="vqmovn_high_s64"><div>int32x4_t <b><b>vqmovn_high_s64</b></b> (int32x2_t r, int64x2_t a)<span class="right">Signed saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtn-sqxtn2-signed-saturating-extract-narrow">SQXTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u16" type="checkbox"><label for="vqmovn_high_u16"><div>uint8x16_t <b><b>vqmovn_high_u16</b></b> (uint8x8_t r, uint16x8_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u32" type="checkbox"><label for="vqmovn_high_u32"><div>uint16x8_t <b><b>vqmovn_high_u32</b></b> (uint16x4_t r, uint32x4_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovn_high_u64" type="checkbox"><label for="vqmovn_high_u64"><div>uint32x4_t <b><b>vqmovn_high_u64</b></b> (uint32x2_t r, uint64x2_t a)<span class="right">Unsigned saturating extract narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&amp;FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uqxtn-uqxtn2-unsigned-saturating-extract-narrow">UQXTN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SatQ.3" title="function: (bits(N), boolean) SatQ(integer i, integer N, boolean unsigned)">SatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s16" type="checkbox"><label for="vqmovun_s16"><div>uint8x8_t <b><b>vqmovun_s16</b></b> (int16x8_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.8B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s32" type="checkbox"><label for="vqmovun_s32"><div>uint16x4_t <b><b>vqmovun_s32</b></b> (int32x4_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.4H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_s64" type="checkbox"><label for="vqmovun_s64"><div>uint32x2_t <b><b>vqmovun_s64</b></b> (int64x2_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Vd.2S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqmovunh_s16" type="checkbox"><label for="vqmovunh_s16"><div>uint8_t <b><b>vqmovunh_s16</b></b> (int16_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Bd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovuns_s32" type="checkbox"><label for="vqmovuns_s32"><div>uint16_t <b><b>vqmovuns_s32</b></b> (int32_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Hd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovund_s64" type="checkbox"><label for="vqmovund_s64"><div>uint32_t <b><b>vqmovund_s64</b></b> (int64_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN</a> Sd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s16" type="checkbox"><label for="vqmovun_high_s16"><div>uint8x16_t <b><b>vqmovun_high_s16</b></b> (uint8x8_t r, int16x8_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.16B,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.8B <br />
+a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s32" type="checkbox"><label for="vqmovun_high_s32"><div>uint16x8_t <b><b>vqmovun_high_s32</b></b> (uint16x4_t r, int32x4_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.8H,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.4H <br />
+a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqmovun_high_s64" type="checkbox"><label for="vqmovun_high_s64"><div>uint32x4_t <b><b>vqmovun_high_s64</b></b> (uint32x2_t r, int64x2_t a)<span class="right">Signed saturating extract unsigned narrow</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&amp;FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&amp;FP register. The destination vector elements are half as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqxtun-sqxtun2-signed-saturating-extract-unsigned-narrow">SQXTUN2</a> Vd.4S,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>r &rarr; Vd.2S <br />
+a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(2*datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 2*esize];
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedSatQ.2" title="function: (bits(N), boolean) UnsignedSatQ(integer i, integer N)">UnsignedSatQ</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(element), esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.write.2" title="accessor: Vpart[integer n, integer part] = bits(width) value">Vpart</a>[d, part] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_s16" type="checkbox"><label for="vmla_lane_s16"><div>int16x4_t <b><b>vmla_lane_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_s16" type="checkbox"><label for="vmlaq_lane_s16"><div>int16x8_t <b><b>vmlaq_lane_s16</b></b> (int16x8_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_s32" type="checkbox"><label for="vmla_lane_s32"><div>int32x2_t <b><b>vmla_lane_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_s32" type="checkbox"><label for="vmlaq_lane_s32"><div>int32x4_t <b><b>vmlaq_lane_s32</b></b> (int32x4_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_u16" type="checkbox"><label for="vmla_lane_u16"><div>uint16x4_t <b><b>vmla_lane_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_u16" type="checkbox"><label for="vmlaq_lane_u16"><div>uint16x8_t <b><b>vmlaq_lane_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_u32" type="checkbox"><label for="vmla_lane_u32"><div>uint32x2_t <b><b>vmla_lane_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_u32" type="checkbox"><label for="vmlaq_lane_u32"><div>uint32x4_t <b><b>vmlaq_lane_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_lane_f32" type="checkbox"><label for="vmla_lane_f32"><div>float32x2_t <b><b>vmla_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_lane_f32" type="checkbox"><label for="vmlaq_lane_f32"><div>float32x4_t <b><b>vmlaq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_s16" type="checkbox"><label for="vmla_laneq_s16"><div>int16x4_t <b><b>vmla_laneq_s16</b></b> (int16x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_s16" type="checkbox"><label for="vmlaq_laneq_s16"><div>int16x8_t <b><b>vmlaq_laneq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_s32" type="checkbox"><label for="vmla_laneq_s32"><div>int32x2_t <b><b>vmla_laneq_s32</b></b> (int32x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_s32" type="checkbox"><label for="vmlaq_laneq_s32"><div>int32x4_t <b><b>vmlaq_laneq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_u16" type="checkbox"><label for="vmla_laneq_u16"><div>uint16x4_t <b><b>vmla_laneq_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_u16" type="checkbox"><label for="vmlaq_laneq_u16"><div>uint16x8_t <b><b>vmlaq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_u32" type="checkbox"><label for="vmla_laneq_u32"><div>uint32x2_t <b><b>vmla_laneq_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_u32" type="checkbox"><label for="vmlaq_laneq_u32"><div>uint32x4_t <b><b>vmlaq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_laneq_f32" type="checkbox"><label for="vmla_laneq_f32"><div>float32x2_t <b><b>vmla_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_laneq_f32" type="checkbox"><label for="vmlaq_laneq_f32"><div>float32x4_t <b><b>vmlaq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_s16" type="checkbox"><label for="vmlal_lane_s16"><div>int32x4_t <b><b>vmlal_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_s32" type="checkbox"><label for="vmlal_lane_s32"><div>int64x2_t <b><b>vmlal_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_u16" type="checkbox"><label for="vmlal_lane_u16"><div>uint32x4_t <b><b>vmlal_lane_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_lane_u32" type="checkbox"><label for="vmlal_lane_u32"><div>uint64x2_t <b><b>vmlal_lane_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_s16" type="checkbox"><label for="vmlal_high_lane_s16"><div>int32x4_t <b><b>vmlal_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_s32" type="checkbox"><label for="vmlal_high_lane_s32"><div>int64x2_t <b><b>vmlal_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_u16" type="checkbox"><label for="vmlal_high_lane_u16"><div>uint32x4_t <b><b>vmlal_high_lane_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_lane_u32" type="checkbox"><label for="vmlal_high_lane_u32"><div>uint64x2_t <b><b>vmlal_high_lane_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_s16" type="checkbox"><label for="vmlal_laneq_s16"><div>int32x4_t <b><b>vmlal_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_s32" type="checkbox"><label for="vmlal_laneq_s32"><div>int64x2_t <b><b>vmlal_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_u16" type="checkbox"><label for="vmlal_laneq_u16"><div>uint32x4_t <b><b>vmlal_laneq_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_laneq_u32" type="checkbox"><label for="vmlal_laneq_u32"><div>uint64x2_t <b><b>vmlal_laneq_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_s16" type="checkbox"><label for="vmlal_high_laneq_s16"><div>int32x4_t <b><b>vmlal_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_s32" type="checkbox"><label for="vmlal_high_laneq_s32"><div>int64x2_t <b><b>vmlal_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_u16" type="checkbox"><label for="vmlal_high_laneq_u16"><div>uint32x4_t <b><b>vmlal_high_laneq_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_laneq_u32" type="checkbox"><label for="vmlal_high_laneq_u32"><div>uint64x2_t <b><b>vmlal_high_laneq_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_lane_s16" type="checkbox"><label for="vqdmlal_lane_s16"><div>int32x4_t <b><b>vqdmlal_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_lane_s32" type="checkbox"><label for="vqdmlal_lane_s32"><div>int64x2_t <b><b>vqdmlal_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_lane_s16" type="checkbox"><label for="vqdmlalh_lane_s16"><div>int32_t <b><b>vqdmlalh_lane_s16</b></b> (int32_t a, int16_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_lane_s32" type="checkbox"><label for="vqdmlals_lane_s32"><div>int64_t <b><b>vqdmlals_lane_s32</b></b> (int64_t a, int32_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_lane_s16" type="checkbox"><label for="vqdmlal_high_lane_s16"><div>int32x4_t <b><b>vqdmlal_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_lane_s32" type="checkbox"><label for="vqdmlal_high_lane_s32"><div>int64x2_t <b><b>vqdmlal_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_laneq_s16" type="checkbox"><label for="vqdmlal_laneq_s16"><div>int32x4_t <b><b>vqdmlal_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_laneq_s32" type="checkbox"><label for="vqdmlal_laneq_s32"><div>int64x2_t <b><b>vqdmlal_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlalh_laneq_s16" type="checkbox"><label for="vqdmlalh_laneq_s16"><div>int32_t <b><b>vqdmlalh_laneq_s16</b></b> (int32_t a, int16_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlals_laneq_s32" type="checkbox"><label for="vqdmlals_laneq_s32"><div>int64_t <b><b>vqdmlals_laneq_s32</b></b> (int64_t a, int32_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_laneq_s16" type="checkbox"><label for="vqdmlal_high_laneq_s16"><div>int32x4_t <b><b>vqdmlal_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_laneq_s32" type="checkbox"><label for="vqdmlal_high_laneq_s32"><div>int64x2_t <b><b>vqdmlal_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_s16" type="checkbox"><label for="vmls_lane_s16"><div>int16x4_t <b><b>vmls_lane_s16</b></b> (int16x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_s16" type="checkbox"><label for="vmlsq_lane_s16"><div>int16x8_t <b><b>vmlsq_lane_s16</b></b> (int16x8_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_s32" type="checkbox"><label for="vmls_lane_s32"><div>int32x2_t <b><b>vmls_lane_s32</b></b> (int32x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_s32" type="checkbox"><label for="vmlsq_lane_s32"><div>int32x4_t <b><b>vmlsq_lane_s32</b></b> (int32x4_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_u16" type="checkbox"><label for="vmls_lane_u16"><div>uint16x4_t <b><b>vmls_lane_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_u16" type="checkbox"><label for="vmlsq_lane_u16"><div>uint16x8_t <b><b>vmlsq_lane_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_u32" type="checkbox"><label for="vmls_lane_u32"><div>uint32x2_t <b><b>vmls_lane_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_u32" type="checkbox"><label for="vmlsq_lane_u32"><div>uint32x4_t <b><b>vmlsq_lane_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_lane_f32" type="checkbox"><label for="vmls_lane_f32"><div>float32x2_t <b><b>vmls_lane_f32</b></b> (float32x2_t a, float32x2_t b, float32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_lane_f32" type="checkbox"><label for="vmlsq_lane_f32"><div>float32x4_t <b><b>vmlsq_lane_f32</b></b> (float32x4_t a, float32x4_t b, float32x2_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_s16" type="checkbox"><label for="vmls_laneq_s16"><div>int16x4_t <b><b>vmls_laneq_s16</b></b> (int16x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_s16" type="checkbox"><label for="vmlsq_laneq_s16"><div>int16x8_t <b><b>vmlsq_laneq_s16</b></b> (int16x8_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_s32" type="checkbox"><label for="vmls_laneq_s32"><div>int32x2_t <b><b>vmls_laneq_s32</b></b> (int32x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_s32" type="checkbox"><label for="vmlsq_laneq_s32"><div>int32x4_t <b><b>vmlsq_laneq_s32</b></b> (int32x4_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_u16" type="checkbox"><label for="vmls_laneq_u16"><div>uint16x4_t <b><b>vmls_laneq_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_u16" type="checkbox"><label for="vmlsq_laneq_u16"><div>uint16x8_t <b><b>vmlsq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_u32" type="checkbox"><label for="vmls_laneq_u32"><div>uint32x2_t <b><b>vmls_laneq_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_u32" type="checkbox"><label for="vmlsq_laneq_u32"><div>uint32x4_t <b><b>vmlsq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_laneq_f32" type="checkbox"><label for="vmls_laneq_f32"><div>float32x2_t <b><b>vmls_laneq_f32</b></b> (float32x2_t a, float32x2_t b, float32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_laneq_f32" type="checkbox"><label for="vmlsq_laneq_f32"><div>float32x4_t <b><b>vmlsq_laneq_f32</b></b> (float32x4_t a, float32x4_t b, float32x4_t v, const int lane)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_s16" type="checkbox"><label for="vmlsl_lane_s16"><div>int32x4_t <b><b>vmlsl_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_s32" type="checkbox"><label for="vmlsl_lane_s32"><div>int64x2_t <b><b>vmlsl_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_u16" type="checkbox"><label for="vmlsl_lane_u16"><div>uint32x4_t <b><b>vmlsl_lane_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_lane_u32" type="checkbox"><label for="vmlsl_lane_u32"><div>uint64x2_t <b><b>vmlsl_lane_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_s16" type="checkbox"><label for="vmlsl_high_lane_s16"><div>int32x4_t <b><b>vmlsl_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_s32" type="checkbox"><label for="vmlsl_high_lane_s32"><div>int64x2_t <b><b>vmlsl_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_u16" type="checkbox"><label for="vmlsl_high_lane_u16"><div>uint32x4_t <b><b>vmlsl_high_lane_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_lane_u32" type="checkbox"><label for="vmlsl_high_lane_u32"><div>uint64x2_t <b><b>vmlsl_high_lane_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_s16" type="checkbox"><label for="vmlsl_laneq_s16"><div>int32x4_t <b><b>vmlsl_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_s32" type="checkbox"><label for="vmlsl_laneq_s32"><div>int64x2_t <b><b>vmlsl_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_u16" type="checkbox"><label for="vmlsl_laneq_u16"><div>uint32x4_t <b><b>vmlsl_laneq_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_laneq_u32" type="checkbox"><label for="vmlsl_laneq_u32"><div>uint64x2_t <b><b>vmlsl_laneq_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_s16" type="checkbox"><label for="vmlsl_high_laneq_s16"><div>int32x4_t <b><b>vmlsl_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_s32" type="checkbox"><label for="vmlsl_high_laneq_s32"><div>int64x2_t <b><b>vmlsl_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_u16" type="checkbox"><label for="vmlsl_high_laneq_u16"><div>uint32x4_t <b><b>vmlsl_high_laneq_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_laneq_u32" type="checkbox"><label for="vmlsl_high_laneq_u32"><div>uint64x2_t <b><b>vmlsl_high_laneq_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_lane_s16" type="checkbox"><label for="vqdmlsl_lane_s16"><div>int32x4_t <b><b>vqdmlsl_lane_s16</b></b> (int32x4_t a, int16x4_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_lane_s32" type="checkbox"><label for="vqdmlsl_lane_s32"><div>int64x2_t <b><b>vqdmlsl_lane_s32</b></b> (int64x2_t a, int32x2_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_lane_s16" type="checkbox"><label for="vqdmlslh_lane_s16"><div>int32_t <b><b>vqdmlslh_lane_s16</b></b> (int32_t a, int16_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_lane_s32" type="checkbox"><label for="vqdmlsls_lane_s32"><div>int64_t <b><b>vqdmlsls_lane_s32</b></b> (int64_t a, int32_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_lane_s16" type="checkbox"><label for="vqdmlsl_high_lane_s16"><div>int32x4_t <b><b>vqdmlsl_high_lane_s16</b></b> (int32x4_t a, int16x8_t b, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_lane_s32" type="checkbox"><label for="vqdmlsl_high_lane_s32"><div>int64x2_t <b><b>vqdmlsl_high_lane_s32</b></b> (int64x2_t a, int32x4_t b, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_laneq_s16" type="checkbox"><label for="vqdmlsl_laneq_s16"><div>int32x4_t <b><b>vqdmlsl_laneq_s16</b></b> (int32x4_t a, int16x4_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_laneq_s32" type="checkbox"><label for="vqdmlsl_laneq_s32"><div>int64x2_t <b><b>vqdmlsl_laneq_s32</b></b> (int64x2_t a, int32x2_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlslh_laneq_s16" type="checkbox"><label for="vqdmlslh_laneq_s16"><div>int32_t <b><b>vqdmlslh_laneq_s16</b></b> (int32_t a, int16_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sd <br />
+b &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsls_laneq_s32" type="checkbox"><label for="vqdmlsls_laneq_s32"><div>int64_t <b><b>vqdmlsls_laneq_s32</b></b> (int64_t a, int32_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dd <br />
+b &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_laneq_s16" type="checkbox"><label for="vqdmlsl_high_laneq_s16"><div>int32x4_t <b><b>vqdmlsl_high_laneq_s16</b></b> (int32x4_t a, int16x8_t b, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_laneq_s32" type="checkbox"><label for="vqdmlsl_high_laneq_s32"><div>int64x2_t <b><b>vqdmlsl_high_laneq_s32</b></b> (int64x2_t a, int32x4_t b, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_s16" type="checkbox"><label for="vmul_n_s16"><div>int16x4_t <b><b>vmul_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_s16" type="checkbox"><label for="vmulq_n_s16"><div>int16x8_t <b><b>vmulq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_s32" type="checkbox"><label for="vmul_n_s32"><div>int32x2_t <b><b>vmul_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_s32" type="checkbox"><label for="vmulq_n_s32"><div>int32x4_t <b><b>vmulq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_u16" type="checkbox"><label for="vmul_n_u16"><div>uint16x4_t <b><b>vmul_n_u16</b></b> (uint16x4_t a, uint16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_u16" type="checkbox"><label for="vmulq_n_u16"><div>uint16x8_t <b><b>vmulq_n_u16</b></b> (uint16x8_t a, uint16_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_u32" type="checkbox"><label for="vmul_n_u32"><div>uint32x2_t <b><b>vmul_n_u32</b></b> (uint32x2_t a, uint32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_u32" type="checkbox"><label for="vmulq_n_u32"><div>uint32x4_t <b><b>vmulq_n_u32</b></b> (uint32x4_t a, uint32_t b)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_f32" type="checkbox"><label for="vmul_n_f32"><div>float32x2_t <b><b>vmul_n_f32</b></b> (float32x2_t a, float32_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_f32" type="checkbox"><label for="vmulq_n_f32"><div>float32x4_t <b><b>vmulq_n_f32</b></b> (float32x4_t a, float32_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_n_f64" type="checkbox"><label for="vmul_n_f64"><div>float64x1_t <b><b>vmul_n_f64</b></b> (float64x1_t a, float64_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_n_f64" type="checkbox"><label for="vmulq_n_f64"><div>float64x2_t <b><b>vmulq_n_f64</b></b> (float64x2_t a, float64_t b)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_s16" type="checkbox"><label for="vmul_lane_s16"><div>int16x4_t <b><b>vmul_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_s16" type="checkbox"><label for="vmulq_lane_s16"><div>int16x8_t <b><b>vmulq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_s32" type="checkbox"><label for="vmul_lane_s32"><div>int32x2_t <b><b>vmul_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_s32" type="checkbox"><label for="vmulq_lane_s32"><div>int32x4_t <b><b>vmulq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_u16" type="checkbox"><label for="vmul_lane_u16"><div>uint16x4_t <b><b>vmul_lane_u16</b></b> (uint16x4_t a, uint16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_u16" type="checkbox"><label for="vmulq_lane_u16"><div>uint16x8_t <b><b>vmulq_lane_u16</b></b> (uint16x8_t a, uint16x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_u32" type="checkbox"><label for="vmul_lane_u32"><div>uint32x2_t <b><b>vmul_lane_u32</b></b> (uint32x2_t a, uint32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_u32" type="checkbox"><label for="vmulq_lane_u32"><div>uint32x4_t <b><b>vmulq_lane_u32</b></b> (uint32x4_t a, uint32x2_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_f32" type="checkbox"><label for="vmul_lane_f32"><div>float32x2_t <b><b>vmul_lane_f32</b></b> (float32x2_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_f32" type="checkbox"><label for="vmulq_lane_f32"><div>float32x4_t <b><b>vmulq_lane_f32</b></b> (float32x4_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmul_lane_f64" type="checkbox"><label for="vmul_lane_f64"><div>float64x1_t <b><b>vmul_lane_f64</b></b> (float64x1_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_lane_f64" type="checkbox"><label for="vmulq_lane_f64"><div>float64x2_t <b><b>vmulq_lane_f64</b></b> (float64x2_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuls_lane_f32" type="checkbox"><label for="vmuls_lane_f32"><div>float32_t <b><b>vmuls_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuld_lane_f64" type="checkbox"><label for="vmuld_lane_f64"><div>float64_t <b><b>vmuld_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_s16" type="checkbox"><label for="vmul_laneq_s16"><div>int16x4_t <b><b>vmul_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_s16" type="checkbox"><label for="vmulq_laneq_s16"><div>int16x8_t <b><b>vmulq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_s32" type="checkbox"><label for="vmul_laneq_s32"><div>int32x2_t <b><b>vmul_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_s32" type="checkbox"><label for="vmulq_laneq_s32"><div>int32x4_t <b><b>vmulq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_u16" type="checkbox"><label for="vmul_laneq_u16"><div>uint16x4_t <b><b>vmul_laneq_u16</b></b> (uint16x4_t a, uint16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_u16" type="checkbox"><label for="vmulq_laneq_u16"><div>uint16x8_t <b><b>vmulq_laneq_u16</b></b> (uint16x8_t a, uint16x8_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_u32" type="checkbox"><label for="vmul_laneq_u32"><div>uint32x2_t <b><b>vmul_laneq_u32</b></b> (uint32x2_t a, uint32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_u32" type="checkbox"><label for="vmulq_laneq_u32"><div>uint32x4_t <b><b>vmulq_laneq_u32</b></b> (uint32x4_t a, uint32x4_t v, const int lane)<span class="right">Multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mul-vector-multiply-vector">MUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if poly then
+        product = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2)&lt;esize-1:0&gt;;
+    else
+        product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_f32" type="checkbox"><label for="vmul_laneq_f32"><div>float32x2_t <b><b>vmul_laneq_f32</b></b> (float32x2_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_f32" type="checkbox"><label for="vmulq_laneq_f32"><div>float32x4_t <b><b>vmulq_laneq_f32</b></b> (float32x4_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmul_laneq_f64" type="checkbox"><label for="vmul_laneq_f64"><div>float64x1_t <b><b>vmul_laneq_f64</b></b> (float64x1_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmulq_laneq_f64" type="checkbox"><label for="vmulq_laneq_f64"><div>float64x2_t <b><b>vmulq_laneq_f64</b></b> (float64x2_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Vd.2D,Vn.2D,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuls_laneq_f32" type="checkbox"><label for="vmuls_laneq_f32"><div>float32_t <b><b>vmuls_laneq_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmuld_laneq_f64" type="checkbox"><label for="vmuld_laneq_f64"><div>float64_t <b><b>vmuld_laneq_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Floating-point multiply</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmul-vector-floating-point-multiply-vector">FMUL</a> Dd,Dn,Vm.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+v &rarr; Vm.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMul.3" title="function: bits(N) FPMul(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMul</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_s16" type="checkbox"><label for="vmull_n_s16"><div>int32x4_t <b><b>vmull_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_s32" type="checkbox"><label for="vmull_n_s32"><div>int64x2_t <b><b>vmull_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_u16" type="checkbox"><label for="vmull_n_u16"><div>uint32x4_t <b><b>vmull_n_u16</b></b> (uint16x4_t a, uint16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_n_u32" type="checkbox"><label for="vmull_n_u32"><div>uint64x2_t <b><b>vmull_n_u32</b></b> (uint32x2_t a, uint32_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_s16" type="checkbox"><label for="vmull_high_n_s16"><div>int32x4_t <b><b>vmull_high_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_s32" type="checkbox"><label for="vmull_high_n_s32"><div>int64x2_t <b><b>vmull_high_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_u16" type="checkbox"><label for="vmull_high_n_u16"><div>uint32x4_t <b><b>vmull_high_n_u16</b></b> (uint16x8_t a, uint16_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_n_u32" type="checkbox"><label for="vmull_high_n_u32"><div>uint64x2_t <b><b>vmull_high_n_u32</b></b> (uint32x4_t a, uint32_t b)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_s16" type="checkbox"><label for="vmull_lane_s16"><div>int32x4_t <b><b>vmull_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_s32" type="checkbox"><label for="vmull_lane_s32"><div>int64x2_t <b><b>vmull_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_u16" type="checkbox"><label for="vmull_lane_u16"><div>uint32x4_t <b><b>vmull_lane_u16</b></b> (uint16x4_t a, uint16x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_lane_u32" type="checkbox"><label for="vmull_lane_u32"><div>uint64x2_t <b><b>vmull_lane_u32</b></b> (uint32x2_t a, uint32x2_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_s16" type="checkbox"><label for="vmull_high_lane_s16"><div>int32x4_t <b><b>vmull_high_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_s32" type="checkbox"><label for="vmull_high_lane_s32"><div>int64x2_t <b><b>vmull_high_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_u16" type="checkbox"><label for="vmull_high_lane_u16"><div>uint32x4_t <b><b>vmull_high_lane_u16</b></b> (uint16x8_t a, uint16x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_lane_u32" type="checkbox"><label for="vmull_high_lane_u32"><div>uint64x2_t <b><b>vmull_high_lane_u32</b></b> (uint32x4_t a, uint32x2_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_s16" type="checkbox"><label for="vmull_laneq_s16"><div>int32x4_t <b><b>vmull_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_s32" type="checkbox"><label for="vmull_laneq_s32"><div>int64x2_t <b><b>vmull_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_u16" type="checkbox"><label for="vmull_laneq_u16"><div>uint32x4_t <b><b>vmull_laneq_u16</b></b> (uint16x4_t a, uint16x8_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_laneq_u32" type="checkbox"><label for="vmull_laneq_u32"><div>uint64x2_t <b><b>vmull_laneq_u32</b></b> (uint32x2_t a, uint32x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_s16" type="checkbox"><label for="vmull_high_laneq_s16"><div>int32x4_t <b><b>vmull_high_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_s32" type="checkbox"><label for="vmull_high_laneq_s32"><div>int64x2_t <b><b>vmull_high_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smull-smull2-vector-signed-multiply-long-vector">SMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_u16" type="checkbox"><label for="vmull_high_laneq_u16"><div>uint32x4_t <b><b>vmull_high_laneq_u16</b></b> (uint16x8_t a, uint16x8_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_laneq_u32" type="checkbox"><label for="vmull_high_laneq_u32"><div>uint64x2_t <b><b>vmull_high_laneq_u32</b></b> (uint32x4_t a, uint32x4_t v, const int lane)<span class="right">Unsigned multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umull-umull2-vector-unsigned-multiply-long-vector">UMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = (element1*element2)&lt;2*esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_n_s16" type="checkbox"><label for="vqdmull_n_s16"><div>int32x4_t <b><b>vqdmull_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_n_s32" type="checkbox"><label for="vqdmull_n_s32"><div>int64x2_t <b><b>vqdmull_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_n_s16" type="checkbox"><label for="vqdmull_high_n_s16"><div>int32x4_t <b><b>vqdmull_high_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_n_s32" type="checkbox"><label for="vqdmull_high_n_s32"><div>int64x2_t <b><b>vqdmull_high_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_lane_s16" type="checkbox"><label for="vqdmull_lane_s16"><div>int32x4_t <b><b>vqdmull_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_lane_s32" type="checkbox"><label for="vqdmull_lane_s32"><div>int64x2_t <b><b>vqdmull_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_lane_s16" type="checkbox"><label for="vqdmullh_lane_s16"><div>int32_t <b><b>vqdmullh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_lane_s32" type="checkbox"><label for="vqdmulls_lane_s32"><div>int64_t <b><b>vqdmulls_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_lane_s16" type="checkbox"><label for="vqdmull_high_lane_s16"><div>int32x4_t <b><b>vqdmull_high_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_lane_s32" type="checkbox"><label for="vqdmull_high_lane_s32"><div>int64x2_t <b><b>vqdmull_high_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_laneq_s16" type="checkbox"><label for="vqdmull_laneq_s16"><div>int32x4_t <b><b>vqdmull_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.4S,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_laneq_s32" type="checkbox"><label for="vqdmull_laneq_s32"><div>int64x2_t <b><b>vqdmull_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Vd.2D,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmullh_laneq_s16" type="checkbox"><label for="vqdmullh_laneq_s16"><div>int32_t <b><b>vqdmullh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Sd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulls_laneq_s32" type="checkbox"><label for="vqdmulls_laneq_s32"><div>int64_t <b><b>vqdmulls_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL</a> Dd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_laneq_s16" type="checkbox"><label for="vqdmull_high_laneq_s16"><div>int32x4_t <b><b>vqdmull_high_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.4S,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmull_high_laneq_s32" type="checkbox"><label for="vqdmull_high_laneq_s32"><div>int64x2_t <b><b>vqdmull_high_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmull-sqdmull2-vector-signed-saturating-doubling-multiply-long">SQDMULL2</a> Vd.2D,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_n_s16" type="checkbox"><label for="vqdmulh_n_s16"><div>int16x4_t <b><b>vqdmulh_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_n_s16" type="checkbox"><label for="vqdmulhq_n_s16"><div>int16x8_t <b><b>vqdmulhq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_n_s32" type="checkbox"><label for="vqdmulh_n_s32"><div>int32x2_t <b><b>vqdmulh_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_n_s32" type="checkbox"><label for="vqdmulhq_n_s32"><div>int32x4_t <b><b>vqdmulhq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_lane_s16" type="checkbox"><label for="vqdmulh_lane_s16"><div>int16x4_t <b><b>vqdmulh_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_lane_s16" type="checkbox"><label for="vqdmulhq_lane_s16"><div>int16x8_t <b><b>vqdmulhq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_lane_s32" type="checkbox"><label for="vqdmulh_lane_s32"><div>int32x2_t <b><b>vqdmulh_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_lane_s32" type="checkbox"><label for="vqdmulhq_lane_s32"><div>int32x4_t <b><b>vqdmulhq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_lane_s16" type="checkbox"><label for="vqdmulhh_lane_s16"><div>int16_t <b><b>vqdmulhh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_lane_s32" type="checkbox"><label for="vqdmulhs_lane_s32"><div>int32_t <b><b>vqdmulhs_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_laneq_s16" type="checkbox"><label for="vqdmulh_laneq_s16"><div>int16x4_t <b><b>vqdmulh_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_laneq_s16" type="checkbox"><label for="vqdmulhq_laneq_s16"><div>int16x8_t <b><b>vqdmulhq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulh_laneq_s32" type="checkbox"><label for="vqdmulh_laneq_s32"><div>int32x2_t <b><b>vqdmulh_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhq_laneq_s32" type="checkbox"><label for="vqdmulhq_laneq_s32"><div>int32x4_t <b><b>vqdmulhq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhh_laneq_s16" type="checkbox"><label for="vqdmulhh_laneq_s16"><div>int16_t <b><b>vqdmulhh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmulhs_laneq_s32" type="checkbox"><label for="vqdmulhs_laneq_s32"><div>int32_t <b><b>vqdmulhs_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmulh-vector-signed-saturating-doubling-multiply-returning-high-half">SQDMULH</a> Sd,Sn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_n_s16" type="checkbox"><label for="vqrdmulh_n_s16"><div>int16x4_t <b><b>vqrdmulh_n_s16</b></b> (int16x4_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_n_s16" type="checkbox"><label for="vqrdmulhq_n_s16"><div>int16x8_t <b><b>vqrdmulhq_n_s16</b></b> (int16x8_t a, int16_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_n_s32" type="checkbox"><label for="vqrdmulh_n_s32"><div>int32x2_t <b><b>vqrdmulh_n_s32</b></b> (int32x2_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_n_s32" type="checkbox"><label for="vqrdmulhq_n_s32"><div>int32x4_t <b><b>vqrdmulhq_n_s32</b></b> (int32x4_t a, int32_t b)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_lane_s16" type="checkbox"><label for="vqrdmulh_lane_s16"><div>int16x4_t <b><b>vqrdmulh_lane_s16</b></b> (int16x4_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_lane_s16" type="checkbox"><label for="vqrdmulhq_lane_s16"><div>int16x8_t <b><b>vqrdmulhq_lane_s16</b></b> (int16x8_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_lane_s32" type="checkbox"><label for="vqrdmulh_lane_s32"><div>int32x2_t <b><b>vqrdmulh_lane_s32</b></b> (int32x2_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_lane_s32" type="checkbox"><label for="vqrdmulhq_lane_s32"><div>int32x4_t <b><b>vqrdmulhq_lane_s32</b></b> (int32x4_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_lane_s16" type="checkbox"><label for="vqrdmulhh_lane_s16"><div>int16_t <b><b>vqrdmulhh_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_lane_s32" type="checkbox"><label for="vqrdmulhs_lane_s32"><div>int32_t <b><b>vqrdmulhs_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_laneq_s16" type="checkbox"><label for="vqrdmulh_laneq_s16"><div>int16x4_t <b><b>vqrdmulh_laneq_s16</b></b> (int16x4_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4H,Vn.4H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_laneq_s16" type="checkbox"><label for="vqrdmulhq_laneq_s16"><div>int16x8_t <b><b>vqrdmulhq_laneq_s16</b></b> (int16x8_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.8H,Vn.8H,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulh_laneq_s32" type="checkbox"><label for="vqrdmulh_laneq_s32"><div>int32x2_t <b><b>vqrdmulh_laneq_s32</b></b> (int32x2_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.2S,Vn.2S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhq_laneq_s32" type="checkbox"><label for="vqrdmulhq_laneq_s32"><div>int32x4_t <b><b>vqrdmulhq_laneq_s32</b></b> (int32x4_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Vd.4S,Vn.4S,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhh_laneq_s16" type="checkbox"><label for="vqrdmulhh_laneq_s16"><div>int16_t <b><b>vqrdmulhh_laneq_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Hd,Hn,Vm.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn <br />
+v &rarr; Vm.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqrdmulhs_laneq_s32" type="checkbox"><label for="vqrdmulhs_laneq_s32"><div>int32_t <b><b>vqrdmulhs_laneq_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Signed saturating rounding doubling multiply returning high half</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&amp;FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqrdmulh-vector-signed-saturating-rounding-doubling-multiply-returning-high-half">SQRDMULH</a> Sd,Sn,Vm.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+v &rarr; Vm.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 &lt;&lt; (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(product &gt;&gt; esize, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_s16" type="checkbox"><label for="vmla_n_s16"><div>int16x4_t <b><b>vmla_n_s16</b></b> (int16x4_t a, int16x4_t b, int16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_s16" type="checkbox"><label for="vmlaq_n_s16"><div>int16x8_t <b><b>vmlaq_n_s16</b></b> (int16x8_t a, int16x8_t b, int16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_s32" type="checkbox"><label for="vmla_n_s32"><div>int32x2_t <b><b>vmla_n_s32</b></b> (int32x2_t a, int32x2_t b, int32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_s32" type="checkbox"><label for="vmlaq_n_s32"><div>int32x4_t <b><b>vmlaq_n_s32</b></b> (int32x4_t a, int32x4_t b, int32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_u16" type="checkbox"><label for="vmla_n_u16"><div>uint16x4_t <b><b>vmla_n_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_u16" type="checkbox"><label for="vmlaq_n_u16"><div>uint16x8_t <b><b>vmlaq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_u32" type="checkbox"><label for="vmla_n_u32"><div>uint32x2_t <b><b>vmla_n_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_u32" type="checkbox"><label for="vmlaq_n_u32"><div>uint32x4_t <b><b>vmlaq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32_t c)<span class="right">Multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mla-vector-multiply-add-to-accumulator-vector">MLA</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmla_n_f32" type="checkbox"><label for="vmla_n_f32"><div>float32x2_t <b><b>vmla_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t c)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlaq_n_f32" type="checkbox"><label for="vmlaq_n_f32"><div>float32x4_t <b><b>vmlaq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t c)<span class="right">Undefined</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] + (b[i] * c) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_s16" type="checkbox"><label for="vmlal_n_s16"><div>int32x4_t <b><b>vmlal_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_s32" type="checkbox"><label for="vmlal_n_s32"><div>int64x2_t <b><b>vmlal_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_u16" type="checkbox"><label for="vmlal_n_u16"><div>uint32x4_t <b><b>vmlal_n_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_n_u32" type="checkbox"><label for="vmlal_n_u32"><div>uint64x2_t <b><b>vmlal_n_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_s16" type="checkbox"><label for="vmlal_high_n_s16"><div>int32x4_t <b><b>vmlal_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_s32" type="checkbox"><label for="vmlal_high_n_s32"><div>int64x2_t <b><b>vmlal_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlal-smlal2-vector-signed-multiply-add-long-vector">SMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_u16" type="checkbox"><label for="vmlal_high_n_u16"><div>uint32x4_t <b><b>vmlal_high_n_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlal_high_n_u32" type="checkbox"><label for="vmlal_high_n_u32"><div>uint64x2_t <b><b>vmlal_high_n_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32_t c)<span class="right">Unsigned multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&amp;FP register by the corresponding vector elements of the second source SIMD&amp;FP register, and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlal-umlal2-vector-unsigned-multiply-add-long-vector">UMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_n_s16" type="checkbox"><label for="vqdmlal_n_s16"><div>int32x4_t <b><b>vqdmlal_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_n_s32" type="checkbox"><label for="vqdmlal_n_s32"><div>int64x2_t <b><b>vqdmlal_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_n_s16" type="checkbox"><label for="vqdmlal_high_n_s16"><div>int32x4_t <b><b>vqdmlal_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlal_high_n_s32" type="checkbox"><label for="vqdmlal_high_n_s32"><div>int64x2_t <b><b>vqdmlal_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed saturating doubling multiply-add long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlal-sqdmlal2-vector-signed-saturating-doubling-multiply-add-long">SQDMLAL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_s16" type="checkbox"><label for="vmls_n_s16"><div>int16x4_t <b><b>vmls_n_s16</b></b> (int16x4_t a, int16x4_t b, int16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_s16" type="checkbox"><label for="vmlsq_n_s16"><div>int16x8_t <b><b>vmlsq_n_s16</b></b> (int16x8_t a, int16x8_t b, int16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_s32" type="checkbox"><label for="vmls_n_s32"><div>int32x2_t <b><b>vmls_n_s32</b></b> (int32x2_t a, int32x2_t b, int32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_s32" type="checkbox"><label for="vmlsq_n_s32"><div>int32x4_t <b><b>vmlsq_n_s32</b></b> (int32x4_t a, int32x4_t b, int32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_u16" type="checkbox"><label for="vmls_n_u16"><div>uint16x4_t <b><b>vmls_n_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4H,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_u16" type="checkbox"><label for="vmlsq_n_u16"><div>uint16x8_t <b><b>vmlsq_n_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.8H,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_u32" type="checkbox"><label for="vmls_n_u32"><div>uint32x2_t <b><b>vmls_n_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_u32" type="checkbox"><label for="vmlsq_n_u32"><div>uint32x4_t <b><b>vmlsq_n_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mls-vector-multiply-subtract-from-accumulator-vector">MLS</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    product = (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element1)*<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(element2))&lt;esize-1:0&gt;;
+    if sub_op then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] - product;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize] + product;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmls_n_f32" type="checkbox"><label for="vmls_n_f32"><div>float32x2_t <b><b>vmls_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c) for i = 0 to 1
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsq_n_f32" type="checkbox"><label for="vmlsq_n_f32"><div>float32x4_t <b><b>vmlsq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t c)<span class="right">Multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre>RESULT[I] = a[i] - (b[i] * c) for i = 0 to 3
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; N/A <br />
+b &rarr; N/A <br />
+c &rarr; N/A </pre>      <h4>Results</h4>      <pre>N/A &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_s16" type="checkbox"><label for="vmlsl_n_s16"><div>int32x4_t <b><b>vmlsl_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_s32" type="checkbox"><label for="vmlsl_n_s32"><div>int64x2_t <b><b>vmlsl_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_u16" type="checkbox"><label for="vmlsl_n_u16"><div>uint32x4_t <b><b>vmlsl_n_u16</b></b> (uint32x4_t a, uint16x4_t b, uint16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_n_u32" type="checkbox"><label for="vmlsl_n_u32"><div>uint64x2_t <b><b>vmlsl_n_u32</b></b> (uint64x2_t a, uint32x2_t b, uint32_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_s16" type="checkbox"><label for="vmlsl_high_n_s16"><div>int32x4_t <b><b>vmlsl_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_s32" type="checkbox"><label for="vmlsl_high_n_s32"><div>int64x2_t <b><b>vmlsl_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smlsl-smlsl2-vector-signed-multiply-subtract-long-vector">SMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_u16" type="checkbox"><label for="vmlsl_high_n_u16"><div>uint32x4_t <b><b>vmlsl_high_n_u16</b></b> (uint32x4_t a, uint16x8_t b, uint16_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmlsl_high_n_u32" type="checkbox"><label for="vmlsl_high_n_u32"><div>uint64x2_t <b><b>vmlsl_high_n_u32</b></b> (uint64x2_t a, uint32x4_t b, uint32_t c)<span class="right">Unsigned multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&amp;FP registers, and subtracts the results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umlsl-umlsl2-vector-unsigned-multiply-subtract-long-vector">UMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize], unsigned);
+    product = (element1*element2)&lt;2*esize-1:0&gt;;
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] - product;
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize] + product;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = accum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_n_s16" type="checkbox"><label for="vqdmlsl_n_s16"><div>int32x4_t <b><b>vqdmlsl_n_s16</b></b> (int32x4_t a, int16x4_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.4S,Vn.4H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_n_s32" type="checkbox"><label for="vqdmlsl_n_s32"><div>int64x2_t <b><b>vqdmlsl_n_s32</b></b> (int64x2_t a, int32x2_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL</a> Vd.2D,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_n_s16" type="checkbox"><label for="vqdmlsl_high_n_s16"><div>int32x4_t <b><b>vqdmlsl_high_n_s16</b></b> (int32x4_t a, int16x8_t b, int16_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.4S,Vn.8H,Vm.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H <br />
+c &rarr; Vm.H[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqdmlsl_high_n_s32" type="checkbox"><label for="vqdmlsl_high_n_s32"><div>int64x2_t <b><b>vqdmlsl_high_n_s32</b></b> (int64x2_t a, int32x4_t b, int32_t c)<span class="right">Signed saturating doubling multiply-subtract long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqdmlsl-sqdmlsl2-vector-signed-saturating-doubling-multiply-subtract-long">SQDMLSL2</a> Vd.2D,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S <br />
+c &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize]);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize]);
+    (product, sat1) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) - <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    else
+        accum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 2*esize]) + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(product);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize], sat2) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s8" type="checkbox"><label for="vabs_s8"><div>int8x8_t <b><b>vabs_s8</b></b> (int8x8_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s8" type="checkbox"><label for="vabsq_s8"><div>int8x16_t <b><b>vabsq_s8</b></b> (int8x16_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s16" type="checkbox"><label for="vabs_s16"><div>int16x4_t <b><b>vabs_s16</b></b> (int16x4_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s16" type="checkbox"><label for="vabsq_s16"><div>int16x8_t <b><b>vabsq_s16</b></b> (int16x8_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s32" type="checkbox"><label for="vabs_s32"><div>int32x2_t <b><b>vabs_s32</b></b> (int32x2_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s32" type="checkbox"><label for="vabsq_s32"><div>int32x4_t <b><b>vabsq_s32</b></b> (int32x4_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_f32" type="checkbox"><label for="vabs_f32"><div>float32x2_t <b><b>vabs_f32</b></b> (float32x2_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_f32" type="checkbox"><label for="vabsq_f32"><div>float32x4_t <b><b>vabsq_f32</b></b> (float32x4_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vabs_s64" type="checkbox"><label for="vabs_s64"><div>int64x1_t <b><b>vabs_s64</b></b> (int64x1_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsd_s64" type="checkbox"><label for="vabsd_s64"><div>int64_t <b><b>vabsd_s64</b></b> (int64_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_s64" type="checkbox"><label for="vabsq_s64"><div>int64x2_t <b><b>vabsq_s64</b></b> (int64x2_t a)<span class="right">Absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/abs-absolute-value-vector">ABS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabs_f64" type="checkbox"><label for="vabs_f64"><div>float64x1_t <b><b>vabs_f64</b></b> (float64x1_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vabsq_f64" type="checkbox"><label for="vabsq_f64"><div>float64x2_t <b><b>vabsq_f64</b></b> (float64x2_t a)<span class="right">Floating-point absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fabs-vector-floating-point-absolute-value-vector">FABS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s8" type="checkbox"><label for="vqabs_s8"><div>int8x8_t <b><b>vqabs_s8</b></b> (int8x8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s8" type="checkbox"><label for="vqabsq_s8"><div>int8x16_t <b><b>vqabsq_s8</b></b> (int8x16_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s16" type="checkbox"><label for="vqabs_s16"><div>int16x4_t <b><b>vqabs_s16</b></b> (int16x4_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s16" type="checkbox"><label for="vqabsq_s16"><div>int16x8_t <b><b>vqabsq_s16</b></b> (int16x8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s32" type="checkbox"><label for="vqabs_s32"><div>int32x2_t <b><b>vqabs_s32</b></b> (int32x2_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s32" type="checkbox"><label for="vqabsq_s32"><div>int32x4_t <b><b>vqabsq_s32</b></b> (int32x4_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqabs_s64" type="checkbox"><label for="vqabs_s64"><div>int64x1_t <b><b>vqabs_s64</b></b> (int64x1_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsq_s64" type="checkbox"><label for="vqabsq_s64"><div>int64x2_t <b><b>vqabsq_s64</b></b> (int64x2_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsb_s8" type="checkbox"><label for="vqabsb_s8"><div>int8_t <b><b>vqabsb_s8</b></b> (int8_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsh_s16" type="checkbox"><label for="vqabsh_s16"><div>int16_t <b><b>vqabsh_s16</b></b> (int16_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabss_s32" type="checkbox"><label for="vqabss_s32"><div>int32_t <b><b>vqabss_s32</b></b> (int32_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqabsd_s64" type="checkbox"><label for="vqabsd_s64"><div>int64_t <b><b>vqabsd_s64</b></b> (int64_t a)<span class="right">Signed saturating absolute value</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&amp;FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqabs-signed-saturating-absolute-value">SQABS</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s8" type="checkbox"><label for="vneg_s8"><div>int8x8_t <b><b>vneg_s8</b></b> (int8x8_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s8" type="checkbox"><label for="vnegq_s8"><div>int8x16_t <b><b>vnegq_s8</b></b> (int8x16_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s16" type="checkbox"><label for="vneg_s16"><div>int16x4_t <b><b>vneg_s16</b></b> (int16x4_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s16" type="checkbox"><label for="vnegq_s16"><div>int16x8_t <b><b>vnegq_s16</b></b> (int16x8_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s32" type="checkbox"><label for="vneg_s32"><div>int32x2_t <b><b>vneg_s32</b></b> (int32x2_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s32" type="checkbox"><label for="vnegq_s32"><div>int32x4_t <b><b>vnegq_s32</b></b> (int32x4_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_f32" type="checkbox"><label for="vneg_f32"><div>float32x2_t <b><b>vneg_f32</b></b> (float32x2_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_f32" type="checkbox"><label for="vnegq_f32"><div>float32x4_t <b><b>vnegq_f32</b></b> (float32x4_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vneg_s64" type="checkbox"><label for="vneg_s64"><div>int64x1_t <b><b>vneg_s64</b></b> (int64x1_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegd_s64" type="checkbox"><label for="vnegd_s64"><div>int64_t <b><b>vnegd_s64</b></b> (int64_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_s64" type="checkbox"><label for="vnegq_s64"><div>int64x2_t <b><b>vnegq_s64</b></b> (int64x2_t a)<span class="right">Negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Negate (vector). This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/neg-vector-negate-vector">NEG</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vneg_f64" type="checkbox"><label for="vneg_f64"><div>float64x1_t <b><b>vneg_f64</b></b> (float64x1_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vnegq_f64" type="checkbox"><label for="vnegq_f64"><div>float64x2_t <b><b>vnegq_f64</b></b> (float64x2_t a)<span class="right">Floating-point negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&amp;FP register, writes the result to a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fneg-vector-floating-point-negate-vector">FNEG</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    if neg then
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element);
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAbs.1" title="function: bits(N) FPAbs(bits(N) op)">FPAbs</a>(element);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s8" type="checkbox"><label for="vqneg_s8"><div>int8x8_t <b><b>vqneg_s8</b></b> (int8x8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s8" type="checkbox"><label for="vqnegq_s8"><div>int8x16_t <b><b>vqnegq_s8</b></b> (int8x16_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s16" type="checkbox"><label for="vqneg_s16"><div>int16x4_t <b><b>vqneg_s16</b></b> (int16x4_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s16" type="checkbox"><label for="vqnegq_s16"><div>int16x8_t <b><b>vqnegq_s16</b></b> (int16x8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s32" type="checkbox"><label for="vqneg_s32"><div>int32x2_t <b><b>vqneg_s32</b></b> (int32x2_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s32" type="checkbox"><label for="vqnegq_s32"><div>int32x4_t <b><b>vqnegq_s32</b></b> (int32x4_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqneg_s64" type="checkbox"><label for="vqneg_s64"><div>int64x1_t <b><b>vqneg_s64</b></b> (int64x1_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegq_s64" type="checkbox"><label for="vqnegq_s64"><div>int64x2_t <b><b>vqnegq_s64</b></b> (int64x2_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegb_s8" type="checkbox"><label for="vqnegb_s8"><div>int8_t <b><b>vqnegb_s8</b></b> (int8_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Bd,Bn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Bn </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegh_s16" type="checkbox"><label for="vqnegh_s16"><div>int16_t <b><b>vqnegh_s16</b></b> (int16_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Hd,Hn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Hn </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegs_s32" type="checkbox"><label for="vqnegs_s32"><div>int32_t <b><b>vqnegs_s32</b></b> (int32_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqnegd_s64" type="checkbox"><label for="vqnegd_s64"><div>int64_t <b><b>vqnegd_s64</b></b> (int64_t a)<span class="right">Signed saturating negate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed saturating Negate. This instruction reads each vector element from the source SIMD&amp;FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sqneg-signed-saturating-negate">SQNEG</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SInt.1" title="function: integer SInt(bits(N) x)">SInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Abs.1" title="function: integer Abs(integer x)">Abs</a>(element);
+    (<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize], sat) = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignedSatQ.2" title="function: (bits(N), boolean) SignedSatQ(integer i, integer N)">SignedSatQ</a>(element, esize);
+    if sat then FPSR.QC = '1';
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s8" type="checkbox"><label for="vcls_s8"><div>int8x8_t <b><b>vcls_s8</b></b> (int8x8_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s8" type="checkbox"><label for="vclsq_s8"><div>int8x16_t <b><b>vclsq_s8</b></b> (int8x16_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s16" type="checkbox"><label for="vcls_s16"><div>int16x4_t <b><b>vcls_s16</b></b> (int16x4_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s16" type="checkbox"><label for="vclsq_s16"><div>int16x8_t <b><b>vclsq_s16</b></b> (int16x8_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcls_s32" type="checkbox"><label for="vcls_s32"><div>int32x2_t <b><b>vcls_s32</b></b> (int32x2_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclsq_s32" type="checkbox"><label for="vclsq_s32"><div>int32x4_t <b><b>vclsq_s32</b></b> (int32x4_t a)<span class="right">Count leading sign bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The count does not include the most significant bit itself.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cls-vector-count-leading-sign-bits-vector">CLS</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s8" type="checkbox"><label for="vclz_s8"><div>int8x8_t <b><b>vclz_s8</b></b> (int8x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s8" type="checkbox"><label for="vclzq_s8"><div>int8x16_t <b><b>vclzq_s8</b></b> (int8x16_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s16" type="checkbox"><label for="vclz_s16"><div>int16x4_t <b><b>vclz_s16</b></b> (int16x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s16" type="checkbox"><label for="vclzq_s16"><div>int16x8_t <b><b>vclzq_s16</b></b> (int16x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_s32" type="checkbox"><label for="vclz_s32"><div>int32x2_t <b><b>vclz_s32</b></b> (int32x2_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_s32" type="checkbox"><label for="vclzq_s32"><div>int32x4_t <b><b>vclzq_s32</b></b> (int32x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u8" type="checkbox"><label for="vclz_u8"><div>uint8x8_t <b><b>vclz_u8</b></b> (uint8x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u8" type="checkbox"><label for="vclzq_u8"><div>uint8x16_t <b><b>vclzq_u8</b></b> (uint8x16_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u16" type="checkbox"><label for="vclz_u16"><div>uint16x4_t <b><b>vclz_u16</b></b> (uint16x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u16" type="checkbox"><label for="vclzq_u16"><div>uint16x8_t <b><b>vclzq_u16</b></b> (uint16x8_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclz_u32" type="checkbox"><label for="vclz_u32"><div>uint32x2_t <b><b>vclz_u32</b></b> (uint32x2_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vclzq_u32" type="checkbox"><label for="vclzq_u32"><div>uint32x4_t <b><b>vclzq_u32</b></b> (uint32x4_t a)<span class="right">Count leading zero bits</span></div></label><article>      <h4>Description</h4><p><p class="aml">Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/clz-vector-count-leading-zero-bits-vector">CLZ</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#CountOp_CLS" title="enumeration CountOp     {CountOp_CLZ, CountOp_CLS, CountOp_CNT}">CountOp_CLS</a> then
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingSignBits.1" title="function: integer CountLeadingSignBits(bits(N) x)">CountLeadingSignBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    else
+        count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.CountLeadingZeroBits.1" title="function: integer CountLeadingZeroBits(bits(N) x)">CountLeadingZeroBits</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_s8" type="checkbox"><label for="vcnt_s8"><div>int8x8_t <b><b>vcnt_s8</b></b> (int8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_s8" type="checkbox"><label for="vcntq_s8"><div>int8x16_t <b><b>vcntq_s8</b></b> (int8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_u8" type="checkbox"><label for="vcnt_u8"><div>uint8x8_t <b><b>vcnt_u8</b></b> (uint8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_u8" type="checkbox"><label for="vcntq_u8"><div>uint8x16_t <b><b>vcntq_u8</b></b> (uint8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcnt_p8" type="checkbox"><label for="vcnt_p8"><div>poly8x8_t <b><b>vcnt_p8</b></b> (poly8x8_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcntq_p8" type="checkbox"><label for="vcntq_p8"><div>poly8x16_t <b><b>vcntq_p8</b></b> (poly8x16_t a)<span class="right">Population count per byte</span></div></label><article>      <h4>Description</h4><p><p class="aml">Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cnt-population-count-per-byte">CNT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitCount.1" title="function: integer BitCount(bits(N) x)">BitCount</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize]);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = count&lt;esize-1:0&gt;;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_u32" type="checkbox"><label for="vrecpe_u32"><div>uint32x2_t <b><b>vrecpe_u32</b></b> (uint32x2_t a)<span class="right">Unsigned reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urecpe-unsigned-reciprocal-estimate">URECPE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRecipEstimate.1" title="function: bits(N) UnsignedRecipEstimate(bits(N) operand)">UnsignedRecipEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_u32" type="checkbox"><label for="vrecpeq_u32"><div>uint32x4_t <b><b>vrecpeq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/urecpe-unsigned-reciprocal-estimate">URECPE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRecipEstimate.1" title="function: bits(N) UnsignedRecipEstimate(bits(N) operand)">UnsignedRecipEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_f32" type="checkbox"><label for="vrecpe_f32"><div>float32x2_t <b><b>vrecpe_f32</b></b> (float32x2_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_f32" type="checkbox"><label for="vrecpeq_f32"><div>float32x4_t <b><b>vrecpeq_f32</b></b> (float32x4_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpe_f64" type="checkbox"><label for="vrecpe_f64"><div>float64x1_t <b><b>vrecpe_f64</b></b> (float64x1_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpeq_f64" type="checkbox"><label for="vrecpeq_f64"><div>float64x2_t <b><b>vrecpeq_f64</b></b> (float64x2_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpes_f32" type="checkbox"><label for="vrecpes_f32"><div>float32_t <b><b>vrecpes_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecped_f64" type="checkbox"><label for="vrecped_f64"><div>float64_t <b><b>vrecped_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpe-floating-point-reciprocal-estimate">FRECPE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecipEstimate.2" title="function: bits(N) FPRecipEstimate(bits(N) operand, FPCRType fpcr)">FPRecipEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecps_f32" type="checkbox"><label for="vrecps_f32"><div>float32x2_t <b><b>vrecps_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsq_f32" type="checkbox"><label for="vrecpsq_f32"><div>float32x4_t <b><b>vrecpsq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrecps_f64" type="checkbox"><label for="vrecps_f64"><div>float64x1_t <b><b>vrecps_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsq_f64" type="checkbox"><label for="vrecpsq_f64"><div>float64x2_t <b><b>vrecpsq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpss_f32" type="checkbox"><label for="vrecpss_f32"><div>float32_t <b><b>vrecpss_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpsd_f64" type="checkbox"><label for="vrecpsd_f64"><div>float64_t <b><b>vrecpsd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point reciprocal step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecps-floating-point-reciprocal-step">FRECPS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRecipStepFused.2" title="function: bits(N) FPRecipStepFused(bits(N) op1, bits(N) op2)">FPRecipStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrt_f32" type="checkbox"><label for="vsqrt_f32"><div>float32x2_t <b><b>vsqrt_f32</b></b> (float32x2_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrtq_f32" type="checkbox"><label for="vsqrtq_f32"><div>float32x4_t <b><b>vsqrtq_f32</b></b> (float32x4_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrt_f64" type="checkbox"><label for="vsqrt_f64"><div>float64x1_t <b><b>vsqrt_f64</b></b> (float64x1_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsqrtq_f64" type="checkbox"><label for="vsqrtq_f64"><div>float64x2_t <b><b>vsqrtq_f64</b></b> (float64x2_t a)<span class="right">Floating-point square root</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fsqrt-vector-floating-point-square-root-vector">FSQRT</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPSqrt.2" title="function: bits(N) FPSqrt(bits(N) op, FPCRType fpcr)">FPSqrt</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_u32" type="checkbox"><label for="vrsqrte_u32"><div>uint32x2_t <b><b>vrsqrte_u32</b></b> (uint32x2_t a)<span class="right">Unsigned reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursqrte-unsigned-reciprocal-square-root-estimate">URSQRTE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRSqrtEstimate.1" title="function: bits(N) UnsignedRSqrtEstimate(bits(N) operand)">UnsignedRSqrtEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_u32" type="checkbox"><label for="vrsqrteq_u32"><div>uint32x4_t <b><b>vrsqrteq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&amp;FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ursqrte-unsigned-reciprocal-square-root-estimate">URSQRTE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UnsignedRSqrtEstimate.1" title="function: bits(N) UnsignedRSqrtEstimate(bits(N) operand)">UnsignedRSqrtEstimate</a>(element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_f32" type="checkbox"><label for="vrsqrte_f32"><div>float32x2_t <b><b>vrsqrte_f32</b></b> (float32x2_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_f32" type="checkbox"><label for="vrsqrteq_f32"><div>float32x4_t <b><b>vrsqrteq_f32</b></b> (float32x4_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrte_f64" type="checkbox"><label for="vrsqrte_f64"><div>float64x1_t <b><b>vrsqrte_f64</b></b> (float64x1_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrteq_f64" type="checkbox"><label for="vrsqrteq_f64"><div>float64x2_t <b><b>vrsqrteq_f64</b></b> (float64x2_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Vd.2D,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtes_f32" type="checkbox"><label for="vrsqrtes_f32"><div>float32_t <b><b>vrsqrtes_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrted_f64" type="checkbox"><label for="vrsqrted_f64"><div>float64_t <b><b>vrsqrted_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal square root estimate</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrte-floating-point-reciprocal-square-root-estimate">FRSQRTE</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRSqrtEstimate.2" title="function: bits(N) FPRSqrtEstimate(bits(N) operand, FPCRType fpcr)">FPRSqrtEstimate</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrts_f32" type="checkbox"><label for="vrsqrts_f32"><div>float32x2_t <b><b>vrsqrts_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsq_f32" type="checkbox"><label for="vrsqrtsq_f32"><div>float32x4_t <b><b>vrsqrtsq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrts_f64" type="checkbox"><label for="vrsqrts_f64"><div>float64x1_t <b><b>vrsqrts_f64</b></b> (float64x1_t a, float64x1_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsq_f64" type="checkbox"><label for="vrsqrtsq_f64"><div>float64x2_t <b><b>vrsqrtsq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtss_f32" type="checkbox"><label for="vrsqrtss_f32"><div>float32_t <b><b>vrsqrtss_f32</b></b> (float32_t a, float32_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Sd,Sn,Sm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn <br />
+b &rarr; Sm </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrsqrtsd_f64" type="checkbox"><label for="vrsqrtsd_f64"><div>float64_t <b><b>vrsqrtsd_f64</b></b> (float64_t a, float64_t b)<span class="right">Floating-point reciprocal square root step</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&amp;FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frsqrts-floating-point-reciprocal-square-root-step">FRSQRTS</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.FPRSqrtStepFused.2" title="function: bits(N) FPRSqrtStepFused(bits(N) op1, bits(N) op2)">FPRSqrtStepFused</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s8" type="checkbox"><label for="vmvn_s8"><div>int8x8_t <b><b>vmvn_s8</b></b> (int8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s8" type="checkbox"><label for="vmvnq_s8"><div>int8x16_t <b><b>vmvnq_s8</b></b> (int8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s16" type="checkbox"><label for="vmvn_s16"><div>int16x4_t <b><b>vmvn_s16</b></b> (int16x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s16" type="checkbox"><label for="vmvnq_s16"><div>int16x8_t <b><b>vmvnq_s16</b></b> (int16x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_s32" type="checkbox"><label for="vmvn_s32"><div>int32x2_t <b><b>vmvn_s32</b></b> (int32x2_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_s32" type="checkbox"><label for="vmvnq_s32"><div>int32x4_t <b><b>vmvnq_s32</b></b> (int32x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u8" type="checkbox"><label for="vmvn_u8"><div>uint8x8_t <b><b>vmvn_u8</b></b> (uint8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u8" type="checkbox"><label for="vmvnq_u8"><div>uint8x16_t <b><b>vmvnq_u8</b></b> (uint8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u16" type="checkbox"><label for="vmvn_u16"><div>uint16x4_t <b><b>vmvn_u16</b></b> (uint16x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u16" type="checkbox"><label for="vmvnq_u16"><div>uint16x8_t <b><b>vmvnq_u16</b></b> (uint16x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_u32" type="checkbox"><label for="vmvn_u32"><div>uint32x2_t <b><b>vmvn_u32</b></b> (uint32x2_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_u32" type="checkbox"><label for="vmvnq_u32"><div>uint32x4_t <b><b>vmvnq_u32</b></b> (uint32x4_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvn_p8" type="checkbox"><label for="vmvn_p8"><div>poly8x8_t <b><b>vmvn_p8</b></b> (poly8x8_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmvnq_p8" type="checkbox"><label for="vmvnq_p8"><div>poly8x16_t <b><b>vmvnq_p8</b></b> (poly8x16_t a)<span class="right">Bitwise NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&amp;FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/mvn-bitwise-not-vector-an-alias-of-not">MVN</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4><p>The description of 
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-simd-and-floating-point-instructions-alphabetic-order/not-bitwise-not-vector">NOT</a> 
+        gives the operational pseudocode for this instruction.</p>      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s8" type="checkbox"><label for="vand_s8"><div>int8x8_t <b><b>vand_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s8" type="checkbox"><label for="vandq_s8"><div>int8x16_t <b><b>vandq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s16" type="checkbox"><label for="vand_s16"><div>int16x4_t <b><b>vand_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s16" type="checkbox"><label for="vandq_s16"><div>int16x8_t <b><b>vandq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s32" type="checkbox"><label for="vand_s32"><div>int32x2_t <b><b>vand_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s32" type="checkbox"><label for="vandq_s32"><div>int32x4_t <b><b>vandq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_s64" type="checkbox"><label for="vand_s64"><div>int64x1_t <b><b>vand_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Dd,Dn,Dm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn <br />
+b &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_s64" type="checkbox"><label for="vandq_s64"><div>int64x2_t <b><b>vandq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u8" type="checkbox"><label for="vand_u8"><div>uint8x8_t <b><b>vand_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u8" type="checkbox"><label for="vandq_u8"><div>uint8x16_t <b><b>vandq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u16" type="checkbox"><label for="vand_u16"><div>uint16x4_t <b><b>vand_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u16" type="checkbox"><label for="vandq_u16"><div>uint16x8_t <b><b>vandq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u32" type="checkbox"><label for="vand_u32"><div>uint32x2_t <b><b>vand_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u32" type="checkbox"><label for="vandq_u32"><div>uint32x4_t <b><b>vandq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vand_u64" type="checkbox"><label for="vand_u64"><div>uint64x1_t <b><b>vand_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vandq_u64" type="checkbox"><label for="vandq_u64"><div>uint64x2_t <b><b>vandq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise AND</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/and-vector-bitwise-and-vector">AND</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s8" type="checkbox"><label for="vorr_s8"><div>int8x8_t <b><b>vorr_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s8" type="checkbox"><label for="vorrq_s8"><div>int8x16_t <b><b>vorrq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s16" type="checkbox"><label for="vorr_s16"><div>int16x4_t <b><b>vorr_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s16" type="checkbox"><label for="vorrq_s16"><div>int16x8_t <b><b>vorrq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s32" type="checkbox"><label for="vorr_s32"><div>int32x2_t <b><b>vorr_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s32" type="checkbox"><label for="vorrq_s32"><div>int32x4_t <b><b>vorrq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_s64" type="checkbox"><label for="vorr_s64"><div>int64x1_t <b><b>vorr_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_s64" type="checkbox"><label for="vorrq_s64"><div>int64x2_t <b><b>vorrq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u8" type="checkbox"><label for="vorr_u8"><div>uint8x8_t <b><b>vorr_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u8" type="checkbox"><label for="vorrq_u8"><div>uint8x16_t <b><b>vorrq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u16" type="checkbox"><label for="vorr_u16"><div>uint16x4_t <b><b>vorr_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u16" type="checkbox"><label for="vorrq_u16"><div>uint16x8_t <b><b>vorrq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u32" type="checkbox"><label for="vorr_u32"><div>uint32x2_t <b><b>vorr_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u32" type="checkbox"><label for="vorrq_u32"><div>uint32x4_t <b><b>vorrq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorr_u64" type="checkbox"><label for="vorr_u64"><div>uint64x1_t <b><b>vorr_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorrq_u64" type="checkbox"><label for="vorrq_u64"><div>uint64x2_t <b><b>vorrq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise inclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orr-vector-register-bitwise-inclusive-or-vector-register">ORR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s8" type="checkbox"><label for="veor_s8"><div>int8x8_t <b><b>veor_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s8" type="checkbox"><label for="veorq_s8"><div>int8x16_t <b><b>veorq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s16" type="checkbox"><label for="veor_s16"><div>int16x4_t <b><b>veor_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s16" type="checkbox"><label for="veorq_s16"><div>int16x8_t <b><b>veorq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s32" type="checkbox"><label for="veor_s32"><div>int32x2_t <b><b>veor_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s32" type="checkbox"><label for="veorq_s32"><div>int32x4_t <b><b>veorq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_s64" type="checkbox"><label for="veor_s64"><div>int64x1_t <b><b>veor_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_s64" type="checkbox"><label for="veorq_s64"><div>int64x2_t <b><b>veorq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u8" type="checkbox"><label for="veor_u8"><div>uint8x8_t <b><b>veor_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u8" type="checkbox"><label for="veorq_u8"><div>uint8x16_t <b><b>veorq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u16" type="checkbox"><label for="veor_u16"><div>uint16x4_t <b><b>veor_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u16" type="checkbox"><label for="veorq_u16"><div>uint16x8_t <b><b>veorq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u32" type="checkbox"><label for="veor_u32"><div>uint32x2_t <b><b>veor_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u32" type="checkbox"><label for="veorq_u32"><div>uint32x4_t <b><b>veorq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veor_u64" type="checkbox"><label for="veor_u64"><div>uint64x1_t <b><b>veor_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="veorq_u64" type="checkbox"><label for="veorq_u64"><div>uint64x2_t <b><b>veorq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise exclusive OR</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&amp;FP registers, and places the result in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/eor-vector-bitwise-exclusive-or-vector">EOR</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Ones.0" title="function: bits(N) Ones()">Ones</a>();
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s8" type="checkbox"><label for="vbic_s8"><div>int8x8_t <b><b>vbic_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s8" type="checkbox"><label for="vbicq_s8"><div>int8x16_t <b><b>vbicq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s16" type="checkbox"><label for="vbic_s16"><div>int16x4_t <b><b>vbic_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s16" type="checkbox"><label for="vbicq_s16"><div>int16x8_t <b><b>vbicq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s32" type="checkbox"><label for="vbic_s32"><div>int32x2_t <b><b>vbic_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s32" type="checkbox"><label for="vbicq_s32"><div>int32x4_t <b><b>vbicq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_s64" type="checkbox"><label for="vbic_s64"><div>int64x1_t <b><b>vbic_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_s64" type="checkbox"><label for="vbicq_s64"><div>int64x2_t <b><b>vbicq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u8" type="checkbox"><label for="vbic_u8"><div>uint8x8_t <b><b>vbic_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u8" type="checkbox"><label for="vbicq_u8"><div>uint8x16_t <b><b>vbicq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u16" type="checkbox"><label for="vbic_u16"><div>uint16x4_t <b><b>vbic_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u16" type="checkbox"><label for="vbicq_u16"><div>uint16x8_t <b><b>vbicq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u32" type="checkbox"><label for="vbic_u32"><div>uint32x2_t <b><b>vbic_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u32" type="checkbox"><label for="vbicq_u32"><div>uint32x4_t <b><b>vbicq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbic_u64" type="checkbox"><label for="vbic_u64"><div>uint64x1_t <b><b>vbic_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbicq_u64" type="checkbox"><label for="vbicq_u64"><div>uint64x2_t <b><b>vbicq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise bit clear</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&amp;FP register and the complement of the second source SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bic-vector-register-bitwise-bit-clear-vector-register">BIC</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s8" type="checkbox"><label for="vorn_s8"><div>int8x8_t <b><b>vorn_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s8" type="checkbox"><label for="vornq_s8"><div>int8x16_t <b><b>vornq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s16" type="checkbox"><label for="vorn_s16"><div>int16x4_t <b><b>vorn_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s16" type="checkbox"><label for="vornq_s16"><div>int16x8_t <b><b>vornq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s32" type="checkbox"><label for="vorn_s32"><div>int32x2_t <b><b>vorn_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s32" type="checkbox"><label for="vornq_s32"><div>int32x4_t <b><b>vornq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_s64" type="checkbox"><label for="vorn_s64"><div>int64x1_t <b><b>vorn_s64</b></b> (int64x1_t a, int64x1_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_s64" type="checkbox"><label for="vornq_s64"><div>int64x2_t <b><b>vornq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u8" type="checkbox"><label for="vorn_u8"><div>uint8x8_t <b><b>vorn_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u8" type="checkbox"><label for="vornq_u8"><div>uint8x16_t <b><b>vornq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u16" type="checkbox"><label for="vorn_u16"><div>uint16x4_t <b><b>vorn_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u16" type="checkbox"><label for="vornq_u16"><div>uint16x8_t <b><b>vornq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u32" type="checkbox"><label for="vorn_u32"><div>uint32x2_t <b><b>vorn_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u32" type="checkbox"><label for="vornq_u32"><div>uint32x4_t <b><b>vornq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vorn_u64" type="checkbox"><label for="vorn_u64"><div>uint64x1_t <b><b>vorn_u64</b></b> (uint64x1_t a, uint64x1_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vornq_u64" type="checkbox"><label for="vornq_u64"><div>uint64x2_t <b><b>vornq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Bitwise inclusive OR NOT</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&amp;FP registers, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/orn-vector-bitwise-inclusive-or-not-vector">ORN</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s8" type="checkbox"><label for="vbsl_s8"><div>int8x8_t <b><b>vbsl_s8</b></b> (uint8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s8" type="checkbox"><label for="vbslq_s8"><div>int8x16_t <b><b>vbslq_s8</b></b> (uint8x16_t a, int8x16_t b, int8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s16" type="checkbox"><label for="vbsl_s16"><div>int16x4_t <b><b>vbsl_s16</b></b> (uint16x4_t a, int16x4_t b, int16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s16" type="checkbox"><label for="vbslq_s16"><div>int16x8_t <b><b>vbslq_s16</b></b> (uint16x8_t a, int16x8_t b, int16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s32" type="checkbox"><label for="vbsl_s32"><div>int32x2_t <b><b>vbsl_s32</b></b> (uint32x2_t a, int32x2_t b, int32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s32" type="checkbox"><label for="vbslq_s32"><div>int32x4_t <b><b>vbslq_s32</b></b> (uint32x4_t a, int32x4_t b, int32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_s64" type="checkbox"><label for="vbsl_s64"><div>int64x1_t <b><b>vbsl_s64</b></b> (uint64x1_t a, int64x1_t b, int64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_s64" type="checkbox"><label for="vbslq_s64"><div>int64x2_t <b><b>vbslq_s64</b></b> (uint64x2_t a, int64x2_t b, int64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u8" type="checkbox"><label for="vbsl_u8"><div>uint8x8_t <b><b>vbsl_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u8" type="checkbox"><label for="vbslq_u8"><div>uint8x16_t <b><b>vbslq_u8</b></b> (uint8x16_t a, uint8x16_t b, uint8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u16" type="checkbox"><label for="vbsl_u16"><div>uint16x4_t <b><b>vbsl_u16</b></b> (uint16x4_t a, uint16x4_t b, uint16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u16" type="checkbox"><label for="vbslq_u16"><div>uint16x8_t <b><b>vbslq_u16</b></b> (uint16x8_t a, uint16x8_t b, uint16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u32" type="checkbox"><label for="vbsl_u32"><div>uint32x2_t <b><b>vbsl_u32</b></b> (uint32x2_t a, uint32x2_t b, uint32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u32" type="checkbox"><label for="vbslq_u32"><div>uint32x4_t <b><b>vbslq_u32</b></b> (uint32x4_t a, uint32x4_t b, uint32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_u64" type="checkbox"><label for="vbsl_u64"><div>uint64x1_t <b><b>vbsl_u64</b></b> (uint64x1_t a, uint64x1_t b, uint64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_u64" type="checkbox"><label for="vbslq_u64"><div>uint64x2_t <b><b>vbslq_u64</b></b> (uint64x2_t a, uint64x2_t b, uint64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p64" type="checkbox"><label for="vbsl_p64"><div>poly64x1_t <b><b>vbsl_p64</b></b> (poly64x1_t a, poly64x1_t b, poly64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p64" type="checkbox"><label for="vbslq_p64"><div>poly64x2_t <b><b>vbslq_p64</b></b> (poly64x2_t a, poly64x2_t b, poly64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_f32" type="checkbox"><label for="vbsl_f32"><div>float32x2_t <b><b>vbsl_f32</b></b> (uint32x2_t a, float32x2_t b, float32x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_f32" type="checkbox"><label for="vbslq_f32"><div>float32x4_t <b><b>vbslq_f32</b></b> (uint32x4_t a, float32x4_t b, float32x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p8" type="checkbox"><label for="vbsl_p8"><div>poly8x8_t <b><b>vbsl_p8</b></b> (uint8x8_t a, poly8x8_t b, poly8x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p8" type="checkbox"><label for="vbslq_p8"><div>poly8x16_t <b><b>vbslq_p8</b></b> (uint8x16_t a, poly8x16_t b, poly8x16_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_p16" type="checkbox"><label for="vbsl_p16"><div>poly16x4_t <b><b>vbsl_p16</b></b> (uint16x4_t a, poly16x4_t b, poly16x4_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_p16" type="checkbox"><label for="vbslq_p16"><div>poly16x8_t <b><b>vbslq_p16</b></b> (uint16x8_t a, poly16x8_t b, poly16x8_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vbsl_f64" type="checkbox"><label for="vbsl_f64"><div>float64x1_t <b><b>vbsl_f64</b></b> (uint64x1_t a, float64x1_t b, float64x1_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+b &rarr; Vn.8B <br />
+c &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vbslq_f64" type="checkbox"><label for="vbslq_f64"><div>float64x2_t <b><b>vbslq_f64</b></b> (uint64x2_t a, float64x2_t b, float64x2_t c)<span class="right">Bitwise select</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Select. This instruction sets each bit in the destination SIMD&amp;FP register to the corresponding bit from the first source SIMD&amp;FP register when the original destination bit was 1, otherwise from the second source SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bsl-bitwise-select">BSL</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+b &rarr; Vn.16B <br />
+c &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s8" type="checkbox"><label for="vcopy_lane_s8"><div>int8x8_t <b><b>vcopy_lane_s8</b></b> (int8x8_t a, const int lane1, int8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s8" type="checkbox"><label for="vcopyq_lane_s8"><div>int8x16_t <b><b>vcopyq_lane_s8</b></b> (int8x16_t a, const int lane1, int8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s16" type="checkbox"><label for="vcopy_lane_s16"><div>int16x4_t <b><b>vcopy_lane_s16</b></b> (int16x4_t a, const int lane1, int16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s16" type="checkbox"><label for="vcopyq_lane_s16"><div>int16x8_t <b><b>vcopyq_lane_s16</b></b> (int16x8_t a, const int lane1, int16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s32" type="checkbox"><label for="vcopy_lane_s32"><div>int32x2_t <b><b>vcopy_lane_s32</b></b> (int32x2_t a, const int lane1, int32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s32" type="checkbox"><label for="vcopyq_lane_s32"><div>int32x4_t <b><b>vcopyq_lane_s32</b></b> (int32x4_t a, const int lane1, int32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_s64" type="checkbox"><label for="vcopy_lane_s64"><div>int64x1_t <b><b>vcopy_lane_s64</b></b> (int64x1_t a, const int lane1, int64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_s64" type="checkbox"><label for="vcopyq_lane_s64"><div>int64x2_t <b><b>vcopyq_lane_s64</b></b> (int64x2_t a, const int lane1, int64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u8" type="checkbox"><label for="vcopy_lane_u8"><div>uint8x8_t <b><b>vcopy_lane_u8</b></b> (uint8x8_t a, const int lane1, uint8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u8" type="checkbox"><label for="vcopyq_lane_u8"><div>uint8x16_t <b><b>vcopyq_lane_u8</b></b> (uint8x16_t a, const int lane1, uint8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u16" type="checkbox"><label for="vcopy_lane_u16"><div>uint16x4_t <b><b>vcopy_lane_u16</b></b> (uint16x4_t a, const int lane1, uint16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u16" type="checkbox"><label for="vcopyq_lane_u16"><div>uint16x8_t <b><b>vcopyq_lane_u16</b></b> (uint16x8_t a, const int lane1, uint16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u32" type="checkbox"><label for="vcopy_lane_u32"><div>uint32x2_t <b><b>vcopy_lane_u32</b></b> (uint32x2_t a, const int lane1, uint32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u32" type="checkbox"><label for="vcopyq_lane_u32"><div>uint32x4_t <b><b>vcopyq_lane_u32</b></b> (uint32x4_t a, const int lane1, uint32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_u64" type="checkbox"><label for="vcopy_lane_u64"><div>uint64x1_t <b><b>vcopy_lane_u64</b></b> (uint64x1_t a, const int lane1, uint64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_u64" type="checkbox"><label for="vcopyq_lane_u64"><div>uint64x2_t <b><b>vcopyq_lane_u64</b></b> (uint64x2_t a, const int lane1, uint64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p64" type="checkbox"><label for="vcopy_lane_p64"><div>poly64x1_t <b><b>vcopy_lane_p64</b></b> (poly64x1_t a, const int lane1, poly64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p64" type="checkbox"><label for="vcopyq_lane_p64"><div>poly64x2_t <b><b>vcopyq_lane_p64</b></b> (poly64x2_t a, const int lane1, poly64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_f32" type="checkbox"><label for="vcopy_lane_f32"><div>float32x2_t <b><b>vcopy_lane_f32</b></b> (float32x2_t a, const int lane1, float32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_f32" type="checkbox"><label for="vcopyq_lane_f32"><div>float32x4_t <b><b>vcopyq_lane_f32</b></b> (float32x4_t a, const int lane1, float32x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.2S <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_f64" type="checkbox"><label for="vcopy_lane_f64"><div>float64x1_t <b><b>vcopy_lane_f64</b></b> (float64x1_t a, const int lane1, float64x1_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_f64" type="checkbox"><label for="vcopyq_lane_f64"><div>float64x2_t <b><b>vcopyq_lane_f64</b></b> (float64x2_t a, const int lane1, float64x1_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.1D <br />
+0 &lt;&lt; lane2 &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p8" type="checkbox"><label for="vcopy_lane_p8"><div>poly8x8_t <b><b>vcopy_lane_p8</b></b> (poly8x8_t a, const int lane1, poly8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p8" type="checkbox"><label for="vcopyq_lane_p8"><div>poly8x16_t <b><b>vcopyq_lane_p8</b></b> (poly8x16_t a, const int lane1, poly8x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.8B <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_lane_p16" type="checkbox"><label for="vcopy_lane_p16"><div>poly16x4_t <b><b>vcopy_lane_p16</b></b> (poly16x4_t a, const int lane1, poly16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_lane_p16" type="checkbox"><label for="vcopyq_lane_p16"><div>poly16x8_t <b><b>vcopyq_lane_p16</b></b> (poly16x8_t a, const int lane1, poly16x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.4H <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s8" type="checkbox"><label for="vcopy_laneq_s8"><div>int8x8_t <b><b>vcopy_laneq_s8</b></b> (int8x8_t a, const int lane1, int8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s8" type="checkbox"><label for="vcopyq_laneq_s8"><div>int8x16_t <b><b>vcopyq_laneq_s8</b></b> (int8x16_t a, const int lane1, int8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s16" type="checkbox"><label for="vcopy_laneq_s16"><div>int16x4_t <b><b>vcopy_laneq_s16</b></b> (int16x4_t a, const int lane1, int16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s16" type="checkbox"><label for="vcopyq_laneq_s16"><div>int16x8_t <b><b>vcopyq_laneq_s16</b></b> (int16x8_t a, const int lane1, int16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s32" type="checkbox"><label for="vcopy_laneq_s32"><div>int32x2_t <b><b>vcopy_laneq_s32</b></b> (int32x2_t a, const int lane1, int32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s32" type="checkbox"><label for="vcopyq_laneq_s32"><div>int32x4_t <b><b>vcopyq_laneq_s32</b></b> (int32x4_t a, const int lane1, int32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_s64" type="checkbox"><label for="vcopy_laneq_s64"><div>int64x1_t <b><b>vcopy_laneq_s64</b></b> (int64x1_t a, const int lane1, int64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_s64" type="checkbox"><label for="vcopyq_laneq_s64"><div>int64x2_t <b><b>vcopyq_laneq_s64</b></b> (int64x2_t a, const int lane1, int64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u8" type="checkbox"><label for="vcopy_laneq_u8"><div>uint8x8_t <b><b>vcopy_laneq_u8</b></b> (uint8x8_t a, const int lane1, uint8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u8" type="checkbox"><label for="vcopyq_laneq_u8"><div>uint8x16_t <b><b>vcopyq_laneq_u8</b></b> (uint8x16_t a, const int lane1, uint8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u16" type="checkbox"><label for="vcopy_laneq_u16"><div>uint16x4_t <b><b>vcopy_laneq_u16</b></b> (uint16x4_t a, const int lane1, uint16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u16" type="checkbox"><label for="vcopyq_laneq_u16"><div>uint16x8_t <b><b>vcopyq_laneq_u16</b></b> (uint16x8_t a, const int lane1, uint16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u32" type="checkbox"><label for="vcopy_laneq_u32"><div>uint32x2_t <b><b>vcopy_laneq_u32</b></b> (uint32x2_t a, const int lane1, uint32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u32" type="checkbox"><label for="vcopyq_laneq_u32"><div>uint32x4_t <b><b>vcopyq_laneq_u32</b></b> (uint32x4_t a, const int lane1, uint32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_u64" type="checkbox"><label for="vcopy_laneq_u64"><div>uint64x1_t <b><b>vcopy_laneq_u64</b></b> (uint64x1_t a, const int lane1, uint64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_u64" type="checkbox"><label for="vcopyq_laneq_u64"><div>uint64x2_t <b><b>vcopyq_laneq_u64</b></b> (uint64x2_t a, const int lane1, uint64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p64" type="checkbox"><label for="vcopy_laneq_p64"><div>poly64x1_t <b><b>vcopy_laneq_p64</b></b> (poly64x1_t a, const int lane1, poly64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p64" type="checkbox"><label for="vcopyq_laneq_p64"><div>poly64x2_t <b><b>vcopyq_laneq_p64</b></b> (poly64x2_t a, const int lane1, poly64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_f32" type="checkbox"><label for="vcopy_laneq_f32"><div>float32x2_t <b><b>vcopy_laneq_f32</b></b> (float32x2_t a, const int lane1, float32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_f32" type="checkbox"><label for="vcopyq_laneq_f32"><div>float32x4_t <b><b>vcopyq_laneq_f32</b></b> (float32x4_t a, const int lane1, float32x4_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane1],Vn.S[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.4S <br />
+0 &lt;&lt; lane2 &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_f64" type="checkbox"><label for="vcopy_laneq_f64"><div>float64x1_t <b><b>vcopy_laneq_f64</b></b> (float64x1_t a, const int lane1, float64x2_t b, const int lane2)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; UNUSED <br />
+0 &lt;&lt; lane1 &lt;&lt; 0 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_f64" type="checkbox"><label for="vcopyq_laneq_f64"><div>float64x2_t <b><b>vcopyq_laneq_f64</b></b> (float64x2_t a, const int lane1, float64x2_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane1],Vn.D[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+0 &lt;&lt; lane1 &lt;&lt; 1 <br />
+b &rarr; Vn.2D <br />
+0 &lt;&lt; lane2 &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p8" type="checkbox"><label for="vcopy_laneq_p8"><div>poly8x8_t <b><b>vcopy_laneq_p8</b></b> (poly8x8_t a, const int lane1, poly8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p8" type="checkbox"><label for="vcopyq_laneq_p8"><div>poly8x16_t <b><b>vcopyq_laneq_p8</b></b> (poly8x16_t a, const int lane1, poly8x16_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane1],Vn.B[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+0 &lt;&lt; lane1 &lt;&lt; 15 <br />
+b &rarr; Vn.16B <br />
+0 &lt;&lt; lane2 &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopy_laneq_p16" type="checkbox"><label for="vcopy_laneq_p16"><div>poly16x4_t <b><b>vcopy_laneq_p16</b></b> (poly16x4_t a, const int lane1, poly16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+0 &lt;&lt; lane1 &lt;&lt; 3 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcopyq_laneq_p16" type="checkbox"><label for="vcopyq_laneq_p16"><div>poly16x8_t <b><b>vcopyq_laneq_p16</b></b> (poly16x8_t a, const int lane1, poly16x8_t b, const int lane2)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane1],Vn.H[lane2]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+0 &lt;&lt; lane1 &lt;&lt; 7 <br />
+b &rarr; Vn.8H <br />
+0 &lt;&lt; lane2 &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_s8" type="checkbox"><label for="vrbit_s8"><div>int8x8_t <b><b>vrbit_s8</b></b> (int8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_s8" type="checkbox"><label for="vrbitq_s8"><div>int8x16_t <b><b>vrbitq_s8</b></b> (int8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_u8" type="checkbox"><label for="vrbit_u8"><div>uint8x8_t <b><b>vrbit_u8</b></b> (uint8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_u8" type="checkbox"><label for="vrbitq_u8"><div>uint8x16_t <b><b>vrbitq_u8</b></b> (uint8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbit_p8" type="checkbox"><label for="vrbit_p8"><div>poly8x8_t <b><b>vrbit_p8</b></b> (poly8x8_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrbitq_p8" type="checkbox"><label for="vrbitq_p8"><div>poly8x16_t <b><b>vrbitq_p8</b></b> (poly8x16_t a)<span class="right">Reverse bit order</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&amp;FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rbit-vector-reverse-bit-order-vector">RBIT</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    for i = 0 to esize-1
+        rev&lt;esize-1-i&gt; = element&lt;i&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = rev;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s8" type="checkbox"><label for="vcreate_s8"><div>int8x8_t <b><b>vcreate_s8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s16" type="checkbox"><label for="vcreate_s16"><div>int16x4_t <b><b>vcreate_s16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s32" type="checkbox"><label for="vcreate_s32"><div>int32x2_t <b><b>vcreate_s32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_s64" type="checkbox"><label for="vcreate_s64"><div>int64x1_t <b><b>vcreate_s64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u8" type="checkbox"><label for="vcreate_u8"><div>uint8x8_t <b><b>vcreate_u8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u16" type="checkbox"><label for="vcreate_u16"><div>uint16x4_t <b><b>vcreate_u16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u32" type="checkbox"><label for="vcreate_u32"><div>uint32x2_t <b><b>vcreate_u32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_u64" type="checkbox"><label for="vcreate_u64"><div>uint64x1_t <b><b>vcreate_u64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p64" type="checkbox"><label for="vcreate_p64"><div>poly64x1_t <b><b>vcreate_p64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f16" type="checkbox"><label for="vcreate_f16"><div>float16x4_t <b><b>vcreate_f16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f32" type="checkbox"><label for="vcreate_f32"><div>float32x2_t <b><b>vcreate_f32</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p8" type="checkbox"><label for="vcreate_p8"><div>poly8x8_t <b><b>vcreate_p8</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_p16" type="checkbox"><label for="vcreate_p16"><div>poly16x4_t <b><b>vcreate_p16</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcreate_f64" type="checkbox"><label for="vcreate_f64"><div>float64x1_t <b><b>vcreate_f64</b></b> (uint64_t a)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[0],Xn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s8" type="checkbox"><label for="vdup_n_s8"><div>int8x8_t <b><b>vdup_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s8" type="checkbox"><label for="vdupq_n_s8"><div>int8x16_t <b><b>vdupq_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s16" type="checkbox"><label for="vdup_n_s16"><div>int16x4_t <b><b>vdup_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s16" type="checkbox"><label for="vdupq_n_s16"><div>int16x8_t <b><b>vdupq_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s32" type="checkbox"><label for="vdup_n_s32"><div>int32x2_t <b><b>vdup_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s32" type="checkbox"><label for="vdupq_n_s32"><div>int32x4_t <b><b>vdupq_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_s64" type="checkbox"><label for="vdup_n_s64"><div>int64x1_t <b><b>vdup_n_s64</b></b> (int64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_s64" type="checkbox"><label for="vdupq_n_s64"><div>int64x2_t <b><b>vdupq_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u8" type="checkbox"><label for="vdup_n_u8"><div>uint8x8_t <b><b>vdup_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u8" type="checkbox"><label for="vdupq_n_u8"><div>uint8x16_t <b><b>vdupq_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u16" type="checkbox"><label for="vdup_n_u16"><div>uint16x4_t <b><b>vdup_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u16" type="checkbox"><label for="vdupq_n_u16"><div>uint16x8_t <b><b>vdupq_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u32" type="checkbox"><label for="vdup_n_u32"><div>uint32x2_t <b><b>vdup_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u32" type="checkbox"><label for="vdupq_n_u32"><div>uint32x4_t <b><b>vdupq_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_u64" type="checkbox"><label for="vdup_n_u64"><div>uint64x1_t <b><b>vdup_n_u64</b></b> (uint64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_u64" type="checkbox"><label for="vdupq_n_u64"><div>uint64x2_t <b><b>vdupq_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p64" type="checkbox"><label for="vdup_n_p64"><div>poly64x1_t <b><b>vdup_n_p64</b></b> (poly64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p64" type="checkbox"><label for="vdupq_n_p64"><div>poly64x2_t <b><b>vdupq_n_p64</b></b> (poly64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_f32" type="checkbox"><label for="vdup_n_f32"><div>float32x2_t <b><b>vdup_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_f32" type="checkbox"><label for="vdupq_n_f32"><div>float32x4_t <b><b>vdupq_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p8" type="checkbox"><label for="vdup_n_p8"><div>poly8x8_t <b><b>vdup_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p8" type="checkbox"><label for="vdupq_n_p8"><div>poly8x16_t <b><b>vdupq_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_p16" type="checkbox"><label for="vdup_n_p16"><div>poly16x4_t <b><b>vdup_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_p16" type="checkbox"><label for="vdupq_n_p16"><div>poly16x8_t <b><b>vdupq_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_n_f64" type="checkbox"><label for="vdup_n_f64"><div>float64x1_t <b><b>vdup_n_f64</b></b> (float64_t value)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Dd.D[0],xn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_n_f64" type="checkbox"><label for="vdupq_n_f64"><div>float64x2_t <b><b>vdupq_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s8" type="checkbox"><label for="vmov_n_s8"><div>int8x8_t <b><b>vmov_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s8" type="checkbox"><label for="vmovq_n_s8"><div>int8x16_t <b><b>vmovq_n_s8</b></b> (int8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s16" type="checkbox"><label for="vmov_n_s16"><div>int16x4_t <b><b>vmov_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s16" type="checkbox"><label for="vmovq_n_s16"><div>int16x8_t <b><b>vmovq_n_s16</b></b> (int16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s32" type="checkbox"><label for="vmov_n_s32"><div>int32x2_t <b><b>vmov_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s32" type="checkbox"><label for="vmovq_n_s32"><div>int32x4_t <b><b>vmovq_n_s32</b></b> (int32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_s64" type="checkbox"><label for="vmov_n_s64"><div>int64x1_t <b><b>vmov_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_s64" type="checkbox"><label for="vmovq_n_s64"><div>int64x2_t <b><b>vmovq_n_s64</b></b> (int64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u8" type="checkbox"><label for="vmov_n_u8"><div>uint8x8_t <b><b>vmov_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u8" type="checkbox"><label for="vmovq_n_u8"><div>uint8x16_t <b><b>vmovq_n_u8</b></b> (uint8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u16" type="checkbox"><label for="vmov_n_u16"><div>uint16x4_t <b><b>vmov_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u16" type="checkbox"><label for="vmovq_n_u16"><div>uint16x8_t <b><b>vmovq_n_u16</b></b> (uint16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u32" type="checkbox"><label for="vmov_n_u32"><div>uint32x2_t <b><b>vmov_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u32" type="checkbox"><label for="vmovq_n_u32"><div>uint32x4_t <b><b>vmovq_n_u32</b></b> (uint32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_u64" type="checkbox"><label for="vmov_n_u64"><div>uint64x1_t <b><b>vmov_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_u64" type="checkbox"><label for="vmovq_n_u64"><div>uint64x2_t <b><b>vmovq_n_u64</b></b> (uint64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_f32" type="checkbox"><label for="vmov_n_f32"><div>float32x2_t <b><b>vmov_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_f32" type="checkbox"><label for="vmovq_n_f32"><div>float32x4_t <b><b>vmovq_n_f32</b></b> (float32_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_p8" type="checkbox"><label for="vmov_n_p8"><div>poly8x8_t <b><b>vmov_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_p8" type="checkbox"><label for="vmovq_n_p8"><div>poly8x16_t <b><b>vmovq_n_p8</b></b> (poly8_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_p16" type="checkbox"><label for="vmov_n_p16"><div>poly16x4_t <b><b>vmov_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_p16" type="checkbox"><label for="vmovq_n_p16"><div>poly16x8_t <b><b>vmovq_n_p16</b></b> (poly16_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmov_n_f64" type="checkbox"><label for="vmov_n_f64"><div>float64x1_t <b><b>vmov_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmovq_n_f64" type="checkbox"><label for="vmovq_n_f64"><div>float64x2_t <b><b>vmovq_n_f64</b></b> (float64_t value)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,rn
+</pre>      <h4>Argument Preparation</h4><pre>value &rarr; rn </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s8" type="checkbox"><label for="vdup_lane_s8"><div>int8x8_t <b><b>vdup_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s8" type="checkbox"><label for="vdupq_lane_s8"><div>int8x16_t <b><b>vdupq_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s16" type="checkbox"><label for="vdup_lane_s16"><div>int16x4_t <b><b>vdup_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s16" type="checkbox"><label for="vdupq_lane_s16"><div>int16x8_t <b><b>vdupq_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s32" type="checkbox"><label for="vdup_lane_s32"><div>int32x2_t <b><b>vdup_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s32" type="checkbox"><label for="vdupq_lane_s32"><div>int32x4_t <b><b>vdupq_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_s64" type="checkbox"><label for="vdup_lane_s64"><div>int64x1_t <b><b>vdup_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_s64" type="checkbox"><label for="vdupq_lane_s64"><div>int64x2_t <b><b>vdupq_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u8" type="checkbox"><label for="vdup_lane_u8"><div>uint8x8_t <b><b>vdup_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u8" type="checkbox"><label for="vdupq_lane_u8"><div>uint8x16_t <b><b>vdupq_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u16" type="checkbox"><label for="vdup_lane_u16"><div>uint16x4_t <b><b>vdup_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u16" type="checkbox"><label for="vdupq_lane_u16"><div>uint16x8_t <b><b>vdupq_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u32" type="checkbox"><label for="vdup_lane_u32"><div>uint32x2_t <b><b>vdup_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u32" type="checkbox"><label for="vdupq_lane_u32"><div>uint32x4_t <b><b>vdupq_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_u64" type="checkbox"><label for="vdup_lane_u64"><div>uint64x1_t <b><b>vdup_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_u64" type="checkbox"><label for="vdupq_lane_u64"><div>uint64x2_t <b><b>vdupq_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p64" type="checkbox"><label for="vdup_lane_p64"><div>poly64x1_t <b><b>vdup_lane_p64</b></b> (poly64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p64" type="checkbox"><label for="vdupq_lane_p64"><div>poly64x2_t <b><b>vdupq_lane_p64</b></b> (poly64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_f32" type="checkbox"><label for="vdup_lane_f32"><div>float32x2_t <b><b>vdup_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_f32" type="checkbox"><label for="vdupq_lane_f32"><div>float32x4_t <b><b>vdupq_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p8" type="checkbox"><label for="vdup_lane_p8"><div>poly8x8_t <b><b>vdup_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p8" type="checkbox"><label for="vdupq_lane_p8"><div>poly8x16_t <b><b>vdupq_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_p16" type="checkbox"><label for="vdup_lane_p16"><div>poly16x4_t <b><b>vdup_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_p16" type="checkbox"><label for="vdupq_lane_p16"><div>poly16x8_t <b><b>vdupq_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_lane_f64" type="checkbox"><label for="vdup_lane_f64"><div>float64x1_t <b><b>vdup_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_lane_f64" type="checkbox"><label for="vdupq_lane_f64"><div>float64x2_t <b><b>vdupq_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s8" type="checkbox"><label for="vdup_laneq_s8"><div>int8x8_t <b><b>vdup_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s8" type="checkbox"><label for="vdupq_laneq_s8"><div>int8x16_t <b><b>vdupq_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s16" type="checkbox"><label for="vdup_laneq_s16"><div>int16x4_t <b><b>vdup_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s16" type="checkbox"><label for="vdupq_laneq_s16"><div>int16x8_t <b><b>vdupq_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s32" type="checkbox"><label for="vdup_laneq_s32"><div>int32x2_t <b><b>vdup_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s32" type="checkbox"><label for="vdupq_laneq_s32"><div>int32x4_t <b><b>vdupq_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_s64" type="checkbox"><label for="vdup_laneq_s64"><div>int64x1_t <b><b>vdup_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_s64" type="checkbox"><label for="vdupq_laneq_s64"><div>int64x2_t <b><b>vdupq_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u8" type="checkbox"><label for="vdup_laneq_u8"><div>uint8x8_t <b><b>vdup_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u8" type="checkbox"><label for="vdupq_laneq_u8"><div>uint8x16_t <b><b>vdupq_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u16" type="checkbox"><label for="vdup_laneq_u16"><div>uint16x4_t <b><b>vdup_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u16" type="checkbox"><label for="vdupq_laneq_u16"><div>uint16x8_t <b><b>vdupq_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u32" type="checkbox"><label for="vdup_laneq_u32"><div>uint32x2_t <b><b>vdup_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u32" type="checkbox"><label for="vdupq_laneq_u32"><div>uint32x4_t <b><b>vdupq_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_u64" type="checkbox"><label for="vdup_laneq_u64"><div>uint64x1_t <b><b>vdup_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_u64" type="checkbox"><label for="vdupq_laneq_u64"><div>uint64x2_t <b><b>vdupq_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p64" type="checkbox"><label for="vdup_laneq_p64"><div>poly64x1_t <b><b>vdup_laneq_p64</b></b> (poly64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p64" type="checkbox"><label for="vdupq_laneq_p64"><div>poly64x2_t <b><b>vdupq_laneq_p64</b></b> (poly64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_f32" type="checkbox"><label for="vdup_laneq_f32"><div>float32x2_t <b><b>vdup_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_f32" type="checkbox"><label for="vdupq_laneq_f32"><div>float32x4_t <b><b>vdupq_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4S,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p8" type="checkbox"><label for="vdup_laneq_p8"><div>poly8x8_t <b><b>vdup_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p8" type="checkbox"><label for="vdupq_laneq_p8"><div>poly8x16_t <b><b>vdupq_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.16B,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_p16" type="checkbox"><label for="vdup_laneq_p16"><div>poly16x4_t <b><b>vdup_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.4H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_p16" type="checkbox"><label for="vdupq_laneq_p16"><div>poly16x8_t <b><b>vdupq_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.8H,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdup_laneq_f64" type="checkbox"><label for="vdup_laneq_f64"><div>float64x1_t <b><b>vdup_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupq_laneq_f64" type="checkbox"><label for="vdupq_laneq_f64"><div>float64x2_t <b><b>vdupq_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.2D,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s8" type="checkbox"><label for="vcombine_s8"><div>int8x16_t <b><b>vcombine_s8</b></b> (int8x8_t low, int8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
+high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s16" type="checkbox"><label for="vcombine_s16"><div>int16x8_t <b><b>vcombine_s16</b></b> (int16x4_t low, int16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s32" type="checkbox"><label for="vcombine_s32"><div>int32x4_t <b><b>vcombine_s32</b></b> (int32x2_t low, int32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
+high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_s64" type="checkbox"><label for="vcombine_s64"><div>int64x2_t <b><b>vcombine_s64</b></b> (int64x1_t low, int64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u8" type="checkbox"><label for="vcombine_u8"><div>uint8x16_t <b><b>vcombine_u8</b></b> (uint8x8_t low, uint8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
+high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u16" type="checkbox"><label for="vcombine_u16"><div>uint16x8_t <b><b>vcombine_u16</b></b> (uint16x4_t low, uint16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u32" type="checkbox"><label for="vcombine_u32"><div>uint32x4_t <b><b>vcombine_u32</b></b> (uint32x2_t low, uint32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
+high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_u64" type="checkbox"><label for="vcombine_u64"><div>uint64x2_t <b><b>vcombine_u64</b></b> (uint64x1_t low, uint64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p64" type="checkbox"><label for="vcombine_p64"><div>poly64x2_t <b><b>vcombine_p64</b></b> (poly64x1_t low, poly64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f16" type="checkbox"><label for="vcombine_f16"><div>float16x8_t <b><b>vcombine_f16</b></b> (float16x4_t low, float16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f32" type="checkbox"><label for="vcombine_f32"><div>float32x4_t <b><b>vcombine_f32</b></b> (float32x2_t low, float32x2_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.2S <br />
+high &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p8" type="checkbox"><label for="vcombine_p8"><div>poly8x16_t <b><b>vcombine_p8</b></b> (poly8x8_t low, poly8x8_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.8B <br />
+high &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_p16" type="checkbox"><label for="vcombine_p16"><div>poly16x8_t <b><b>vcombine_p16</b></b> (poly16x4_t low, poly16x4_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.4H <br />
+high &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vcombine_f64" type="checkbox"><label for="vcombine_f64"><div>float64x2_t <b><b>vcombine_f64</b></b> (float64x1_t low, float64x1_t high)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[1],Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>low &rarr; Vn.1D <br />
+high &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s8" type="checkbox"><label for="vget_high_s8"><div>int8x8_t <b><b>vget_high_s8</b></b> (int8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s16" type="checkbox"><label for="vget_high_s16"><div>int16x4_t <b><b>vget_high_s16</b></b> (int16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s32" type="checkbox"><label for="vget_high_s32"><div>int32x2_t <b><b>vget_high_s32</b></b> (int32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_s64" type="checkbox"><label for="vget_high_s64"><div>int64x1_t <b><b>vget_high_s64</b></b> (int64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u8" type="checkbox"><label for="vget_high_u8"><div>uint8x8_t <b><b>vget_high_u8</b></b> (uint8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u16" type="checkbox"><label for="vget_high_u16"><div>uint16x4_t <b><b>vget_high_u16</b></b> (uint16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u32" type="checkbox"><label for="vget_high_u32"><div>uint32x2_t <b><b>vget_high_u32</b></b> (uint32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_u64" type="checkbox"><label for="vget_high_u64"><div>uint64x1_t <b><b>vget_high_u64</b></b> (uint64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p64" type="checkbox"><label for="vget_high_p64"><div>poly64x1_t <b><b>vget_high_p64</b></b> (poly64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f16" type="checkbox"><label for="vget_high_f16"><div>float16x4_t <b><b>vget_high_f16</b></b> (float16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f32" type="checkbox"><label for="vget_high_f32"><div>float32x2_t <b><b>vget_high_f32</b></b> (float32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p8" type="checkbox"><label for="vget_high_p8"><div>poly8x8_t <b><b>vget_high_p8</b></b> (poly8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_p16" type="checkbox"><label for="vget_high_p16"><div>poly16x4_t <b><b>vget_high_p16</b></b> (poly16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_high_f64" type="checkbox"><label for="vget_high_f64"><div>float64x1_t <b><b>vget_high_f64</b></b> (float64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[1]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s8" type="checkbox"><label for="vget_low_s8"><div>int8x8_t <b><b>vget_low_s8</b></b> (int8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s16" type="checkbox"><label for="vget_low_s16"><div>int16x4_t <b><b>vget_low_s16</b></b> (int16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s32" type="checkbox"><label for="vget_low_s32"><div>int32x2_t <b><b>vget_low_s32</b></b> (int32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_s64" type="checkbox"><label for="vget_low_s64"><div>int64x1_t <b><b>vget_low_s64</b></b> (int64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u8" type="checkbox"><label for="vget_low_u8"><div>uint8x8_t <b><b>vget_low_u8</b></b> (uint8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u16" type="checkbox"><label for="vget_low_u16"><div>uint16x4_t <b><b>vget_low_u16</b></b> (uint16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u32" type="checkbox"><label for="vget_low_u32"><div>uint32x2_t <b><b>vget_low_u32</b></b> (uint32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_u64" type="checkbox"><label for="vget_low_u64"><div>uint64x1_t <b><b>vget_low_u64</b></b> (uint64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p64" type="checkbox"><label for="vget_low_p64"><div>poly64x1_t <b><b>vget_low_p64</b></b> (poly64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f16" type="checkbox"><label for="vget_low_f16"><div>float16x4_t <b><b>vget_low_f16</b></b> (float16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f32" type="checkbox"><label for="vget_low_f32"><div>float32x2_t <b><b>vget_low_f32</b></b> (float32x4_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p8" type="checkbox"><label for="vget_low_p8"><div>poly8x8_t <b><b>vget_low_p8</b></b> (poly8x16_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_p16" type="checkbox"><label for="vget_low_p16"><div>poly16x4_t <b><b>vget_low_p16</b></b> (poly16x8_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_low_f64" type="checkbox"><label for="vget_low_f64"><div>float64x1_t <b><b>vget_low_f64</b></b> (float64x2_t a)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Vd.1D,Vn.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_s8" type="checkbox"><label for="vdupb_lane_s8"><div>int8_t <b><b>vdupb_lane_s8</b></b> (int8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_s16" type="checkbox"><label for="vduph_lane_s16"><div>int16_t <b><b>vduph_lane_s16</b></b> (int16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_s32" type="checkbox"><label for="vdups_lane_s32"><div>int32_t <b><b>vdups_lane_s32</b></b> (int32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_s64" type="checkbox"><label for="vdupd_lane_s64"><div>int64_t <b><b>vdupd_lane_s64</b></b> (int64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_u8" type="checkbox"><label for="vdupb_lane_u8"><div>uint8_t <b><b>vdupb_lane_u8</b></b> (uint8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_u16" type="checkbox"><label for="vduph_lane_u16"><div>uint16_t <b><b>vduph_lane_u16</b></b> (uint16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_u32" type="checkbox"><label for="vdups_lane_u32"><div>uint32_t <b><b>vdups_lane_u32</b></b> (uint32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_u64" type="checkbox"><label for="vdupd_lane_u64"><div>uint64_t <b><b>vdupd_lane_u64</b></b> (uint64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_lane_f32" type="checkbox"><label for="vdups_lane_f32"><div>float32_t <b><b>vdups_lane_f32</b></b> (float32x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_lane_f64" type="checkbox"><label for="vdupd_lane_f64"><div>float64_t <b><b>vdupd_lane_f64</b></b> (float64x1_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_lane_p8" type="checkbox"><label for="vdupb_lane_p8"><div>poly8_t <b><b>vdupb_lane_p8</b></b> (poly8x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_lane_p16" type="checkbox"><label for="vduph_lane_p16"><div>poly16_t <b><b>vduph_lane_p16</b></b> (poly16x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_s8" type="checkbox"><label for="vdupb_laneq_s8"><div>int8_t <b><b>vdupb_laneq_s8</b></b> (int8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_s16" type="checkbox"><label for="vduph_laneq_s16"><div>int16_t <b><b>vduph_laneq_s16</b></b> (int16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_s32" type="checkbox"><label for="vdups_laneq_s32"><div>int32_t <b><b>vdups_laneq_s32</b></b> (int32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_s64" type="checkbox"><label for="vdupd_laneq_s64"><div>int64_t <b><b>vdupd_laneq_s64</b></b> (int64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_u8" type="checkbox"><label for="vdupb_laneq_u8"><div>uint8_t <b><b>vdupb_laneq_u8</b></b> (uint8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_u16" type="checkbox"><label for="vduph_laneq_u16"><div>uint16_t <b><b>vduph_laneq_u16</b></b> (uint16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_u32" type="checkbox"><label for="vdups_laneq_u32"><div>uint32_t <b><b>vdups_laneq_u32</b></b> (uint32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_u64" type="checkbox"><label for="vdupd_laneq_u64"><div>uint64_t <b><b>vdupd_laneq_u64</b></b> (uint64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdups_laneq_f32" type="checkbox"><label for="vdups_laneq_f32"><div>float32_t <b><b>vdups_laneq_f32</b></b> (float32x4_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupd_laneq_f64" type="checkbox"><label for="vdupd_laneq_f64"><div>float64_t <b><b>vdupd_laneq_f64</b></b> (float64x2_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vdupb_laneq_p8" type="checkbox"><label for="vdupb_laneq_p8"><div>poly8_t <b><b>vdupb_laneq_p8</b></b> (poly8x16_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Bd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vduph_laneq_p16" type="checkbox"><label for="vduph_laneq_p16"><div>poly16_t <b><b>vduph_laneq_p16</b></b> (poly16x8_t vec, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8" type="checkbox"><label for="vld1_s8"><div>int8x8_t <b><b>vld1_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8" type="checkbox"><label for="vld1q_s8"><div>int8x16_t <b><b>vld1q_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16" type="checkbox"><label for="vld1_s16"><div>int16x4_t <b><b>vld1_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16" type="checkbox"><label for="vld1q_s16"><div>int16x8_t <b><b>vld1q_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32" type="checkbox"><label for="vld1_s32"><div>int32x2_t <b><b>vld1_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32" type="checkbox"><label for="vld1q_s32"><div>int32x4_t <b><b>vld1q_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64" type="checkbox"><label for="vld1_s64"><div>int64x1_t <b><b>vld1_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64" type="checkbox"><label for="vld1q_s64"><div>int64x2_t <b><b>vld1q_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8" type="checkbox"><label for="vld1_u8"><div>uint8x8_t <b><b>vld1_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8" type="checkbox"><label for="vld1q_u8"><div>uint8x16_t <b><b>vld1q_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16" type="checkbox"><label for="vld1_u16"><div>uint16x4_t <b><b>vld1_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16" type="checkbox"><label for="vld1q_u16"><div>uint16x8_t <b><b>vld1q_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32" type="checkbox"><label for="vld1_u32"><div>uint32x2_t <b><b>vld1_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32" type="checkbox"><label for="vld1q_u32"><div>uint32x4_t <b><b>vld1q_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64" type="checkbox"><label for="vld1_u64"><div>uint64x1_t <b><b>vld1_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64" type="checkbox"><label for="vld1q_u64"><div>uint64x2_t <b><b>vld1q_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64" type="checkbox"><label for="vld1_p64"><div>poly64x1_t <b><b>vld1_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64" type="checkbox"><label for="vld1q_p64"><div>poly64x2_t <b><b>vld1q_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16" type="checkbox"><label for="vld1_f16"><div>float16x4_t <b><b>vld1_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16" type="checkbox"><label for="vld1q_f16"><div>float16x8_t <b><b>vld1q_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32" type="checkbox"><label for="vld1_f32"><div>float32x2_t <b><b>vld1_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32" type="checkbox"><label for="vld1q_f32"><div>float32x4_t <b><b>vld1q_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8" type="checkbox"><label for="vld1_p8"><div>poly8x8_t <b><b>vld1_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8" type="checkbox"><label for="vld1q_p8"><div>poly8x16_t <b><b>vld1q_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16" type="checkbox"><label for="vld1_p16"><div>poly16x4_t <b><b>vld1_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16" type="checkbox"><label for="vld1q_p16"><div>poly16x8_t <b><b>vld1q_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64" type="checkbox"><label for="vld1_f64"><div>float64x1_t <b><b>vld1_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64" type="checkbox"><label for="vld1q_f64"><div>float64x2_t <b><b>vld1q_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s8" type="checkbox"><label for="vld1_lane_s8"><div>int8x8_t <b><b>vld1_lane_s8</b></b> (int8_t const * ptr, int8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s8" type="checkbox"><label for="vld1q_lane_s8"><div>int8x16_t <b><b>vld1q_lane_s8</b></b> (int8_t const * ptr, int8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s16" type="checkbox"><label for="vld1_lane_s16"><div>int16x4_t <b><b>vld1_lane_s16</b></b> (int16_t const * ptr, int16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s16" type="checkbox"><label for="vld1q_lane_s16"><div>int16x8_t <b><b>vld1q_lane_s16</b></b> (int16_t const * ptr, int16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s32" type="checkbox"><label for="vld1_lane_s32"><div>int32x2_t <b><b>vld1_lane_s32</b></b> (int32_t const * ptr, int32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s32" type="checkbox"><label for="vld1q_lane_s32"><div>int32x4_t <b><b>vld1q_lane_s32</b></b> (int32_t const * ptr, int32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_s64" type="checkbox"><label for="vld1_lane_s64"><div>int64x1_t <b><b>vld1_lane_s64</b></b> (int64_t const * ptr, int64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_s64" type="checkbox"><label for="vld1q_lane_s64"><div>int64x2_t <b><b>vld1q_lane_s64</b></b> (int64_t const * ptr, int64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u8" type="checkbox"><label for="vld1_lane_u8"><div>uint8x8_t <b><b>vld1_lane_u8</b></b> (uint8_t const * ptr, uint8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u8" type="checkbox"><label for="vld1q_lane_u8"><div>uint8x16_t <b><b>vld1q_lane_u8</b></b> (uint8_t const * ptr, uint8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u16" type="checkbox"><label for="vld1_lane_u16"><div>uint16x4_t <b><b>vld1_lane_u16</b></b> (uint16_t const * ptr, uint16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u16" type="checkbox"><label for="vld1q_lane_u16"><div>uint16x8_t <b><b>vld1q_lane_u16</b></b> (uint16_t const * ptr, uint16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u32" type="checkbox"><label for="vld1_lane_u32"><div>uint32x2_t <b><b>vld1_lane_u32</b></b> (uint32_t const * ptr, uint32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u32" type="checkbox"><label for="vld1q_lane_u32"><div>uint32x4_t <b><b>vld1q_lane_u32</b></b> (uint32_t const * ptr, uint32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_u64" type="checkbox"><label for="vld1_lane_u64"><div>uint64x1_t <b><b>vld1_lane_u64</b></b> (uint64_t const * ptr, uint64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_u64" type="checkbox"><label for="vld1q_lane_u64"><div>uint64x2_t <b><b>vld1q_lane_u64</b></b> (uint64_t const * ptr, uint64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p64" type="checkbox"><label for="vld1_lane_p64"><div>poly64x1_t <b><b>vld1_lane_p64</b></b> (poly64_t const * ptr, poly64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p64" type="checkbox"><label for="vld1q_lane_p64"><div>poly64x2_t <b><b>vld1q_lane_p64</b></b> (poly64_t const * ptr, poly64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f16" type="checkbox"><label for="vld1_lane_f16"><div>float16x4_t <b><b>vld1_lane_f16</b></b> (float16_t const * ptr, float16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f16" type="checkbox"><label for="vld1q_lane_f16"><div>float16x8_t <b><b>vld1q_lane_f16</b></b> (float16_t const * ptr, float16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f32" type="checkbox"><label for="vld1_lane_f32"><div>float32x2_t <b><b>vld1_lane_f32</b></b> (float32_t const * ptr, float32x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f32" type="checkbox"><label for="vld1q_lane_f32"><div>float32x4_t <b><b>vld1q_lane_f32</b></b> (float32_t const * ptr, float32x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.S}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p8" type="checkbox"><label for="vld1_lane_p8"><div>poly8x8_t <b><b>vld1_lane_p8</b></b> (poly8_t const * ptr, poly8x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p8" type="checkbox"><label for="vld1q_lane_p8"><div>poly8x16_t <b><b>vld1q_lane_p8</b></b> (poly8_t const * ptr, poly8x16_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.B}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_p16" type="checkbox"><label for="vld1_lane_p16"><div>poly16x4_t <b><b>vld1_lane_p16</b></b> (poly16_t const * ptr, poly16x4_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_p16" type="checkbox"><label for="vld1q_lane_p16"><div>poly16x8_t <b><b>vld1q_lane_p16</b></b> (poly16_t const * ptr, poly16x8_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.H}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_lane_f64" type="checkbox"><label for="vld1_lane_f64"><div>float64x1_t <b><b>vld1_lane_f64</b></b> (float64_t const * ptr, float64x1_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_lane_f64" type="checkbox"><label for="vld1q_lane_f64"><div>float64x2_t <b><b>vld1q_lane_f64</b></b> (float64_t const * ptr, float64x2_t src, const int lane)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.D}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s8" type="checkbox"><label for="vld1_dup_s8"><div>int8x8_t <b><b>vld1_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s8" type="checkbox"><label for="vld1q_dup_s8"><div>int8x16_t <b><b>vld1q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s16" type="checkbox"><label for="vld1_dup_s16"><div>int16x4_t <b><b>vld1_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s16" type="checkbox"><label for="vld1q_dup_s16"><div>int16x8_t <b><b>vld1q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s32" type="checkbox"><label for="vld1_dup_s32"><div>int32x2_t <b><b>vld1_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s32" type="checkbox"><label for="vld1q_dup_s32"><div>int32x4_t <b><b>vld1q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_s64" type="checkbox"><label for="vld1_dup_s64"><div>int64x1_t <b><b>vld1_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_s64" type="checkbox"><label for="vld1q_dup_s64"><div>int64x2_t <b><b>vld1q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u8" type="checkbox"><label for="vld1_dup_u8"><div>uint8x8_t <b><b>vld1_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u8" type="checkbox"><label for="vld1q_dup_u8"><div>uint8x16_t <b><b>vld1q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u16" type="checkbox"><label for="vld1_dup_u16"><div>uint16x4_t <b><b>vld1_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u16" type="checkbox"><label for="vld1q_dup_u16"><div>uint16x8_t <b><b>vld1q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u32" type="checkbox"><label for="vld1_dup_u32"><div>uint32x2_t <b><b>vld1_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u32" type="checkbox"><label for="vld1q_dup_u32"><div>uint32x4_t <b><b>vld1q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_u64" type="checkbox"><label for="vld1_dup_u64"><div>uint64x1_t <b><b>vld1_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_u64" type="checkbox"><label for="vld1q_dup_u64"><div>uint64x2_t <b><b>vld1q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p64" type="checkbox"><label for="vld1_dup_p64"><div>poly64x1_t <b><b>vld1_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p64" type="checkbox"><label for="vld1q_dup_p64"><div>poly64x2_t <b><b>vld1q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f16" type="checkbox"><label for="vld1_dup_f16"><div>float16x4_t <b><b>vld1_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f16" type="checkbox"><label for="vld1q_dup_f16"><div>float16x8_t <b><b>vld1q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f32" type="checkbox"><label for="vld1_dup_f32"><div>float32x2_t <b><b>vld1_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f32" type="checkbox"><label for="vld1q_dup_f32"><div>float32x4_t <b><b>vld1q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p8" type="checkbox"><label for="vld1_dup_p8"><div>poly8x8_t <b><b>vld1_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p8" type="checkbox"><label for="vld1q_dup_p8"><div>poly8x16_t <b><b>vld1q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_p16" type="checkbox"><label for="vld1_dup_p16"><div>poly16x4_t <b><b>vld1_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_p16" type="checkbox"><label for="vld1q_dup_p16"><div>poly16x8_t <b><b>vld1q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_dup_f64" type="checkbox"><label for="vld1_dup_f64"><div>float64x1_t <b><b>vld1_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_dup_f64" type="checkbox"><label for="vld1q_dup_f64"><div>float64x2_t <b><b>vld1q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure and replicate to all lanes (of one register)</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1r-load-one-single-element-structure-and-replicate-to-all-lanes-of-one-register">LD1R</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8" type="checkbox"><label for="vst1_s8"><div>void <b><b>vst1_s8</b></b> (int8_t * ptr, int8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8" type="checkbox"><label for="vst1q_s8"><div>void <b><b>vst1q_s8</b></b> (int8_t * ptr, int8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16" type="checkbox"><label for="vst1_s16"><div>void <b><b>vst1_s16</b></b> (int16_t * ptr, int16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16" type="checkbox"><label for="vst1q_s16"><div>void <b><b>vst1q_s16</b></b> (int16_t * ptr, int16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32" type="checkbox"><label for="vst1_s32"><div>void <b><b>vst1_s32</b></b> (int32_t * ptr, int32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32" type="checkbox"><label for="vst1q_s32"><div>void <b><b>vst1q_s32</b></b> (int32_t * ptr, int32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64" type="checkbox"><label for="vst1_s64"><div>void <b><b>vst1_s64</b></b> (int64_t * ptr, int64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64" type="checkbox"><label for="vst1q_s64"><div>void <b><b>vst1q_s64</b></b> (int64_t * ptr, int64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8" type="checkbox"><label for="vst1_u8"><div>void <b><b>vst1_u8</b></b> (uint8_t * ptr, uint8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8" type="checkbox"><label for="vst1q_u8"><div>void <b><b>vst1q_u8</b></b> (uint8_t * ptr, uint8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16" type="checkbox"><label for="vst1_u16"><div>void <b><b>vst1_u16</b></b> (uint16_t * ptr, uint16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16" type="checkbox"><label for="vst1q_u16"><div>void <b><b>vst1q_u16</b></b> (uint16_t * ptr, uint16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32" type="checkbox"><label for="vst1_u32"><div>void <b><b>vst1_u32</b></b> (uint32_t * ptr, uint32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32" type="checkbox"><label for="vst1q_u32"><div>void <b><b>vst1q_u32</b></b> (uint32_t * ptr, uint32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64" type="checkbox"><label for="vst1_u64"><div>void <b><b>vst1_u64</b></b> (uint64_t * ptr, uint64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64" type="checkbox"><label for="vst1q_u64"><div>void <b><b>vst1q_u64</b></b> (uint64_t * ptr, uint64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64" type="checkbox"><label for="vst1_p64"><div>void <b><b>vst1_p64</b></b> (poly64_t * ptr, poly64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64" type="checkbox"><label for="vst1q_p64"><div>void <b><b>vst1q_p64</b></b> (poly64_t * ptr, poly64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16" type="checkbox"><label for="vst1_f16"><div>void <b><b>vst1_f16</b></b> (float16_t * ptr, float16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16" type="checkbox"><label for="vst1q_f16"><div>void <b><b>vst1q_f16</b></b> (float16_t * ptr, float16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32" type="checkbox"><label for="vst1_f32"><div>void <b><b>vst1_f32</b></b> (float32_t * ptr, float32x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32" type="checkbox"><label for="vst1q_f32"><div>void <b><b>vst1q_f32</b></b> (float32_t * ptr, float32x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8" type="checkbox"><label for="vst1_p8"><div>void <b><b>vst1_p8</b></b> (poly8_t * ptr, poly8x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8" type="checkbox"><label for="vst1q_p8"><div>void <b><b>vst1q_p8</b></b> (poly8_t * ptr, poly8x16_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16" type="checkbox"><label for="vst1_p16"><div>void <b><b>vst1_p16</b></b> (poly16_t * ptr, poly16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16" type="checkbox"><label for="vst1q_p16"><div>void <b><b>vst1q_p16</b></b> (poly16_t * ptr, poly16x8_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64" type="checkbox"><label for="vst1_f64"><div>void <b><b>vst1_f64</b></b> (float64_t * ptr, float64x1_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64" type="checkbox"><label for="vst1q_f64"><div>void <b><b>vst1q_f64</b></b> (float64_t * ptr, float64x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s8" type="checkbox"><label for="vst1_lane_s8"><div>void <b><b>vst1_lane_s8</b></b> (int8_t * ptr, int8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s8" type="checkbox"><label for="vst1q_lane_s8"><div>void <b><b>vst1q_lane_s8</b></b> (int8_t * ptr, int8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s16" type="checkbox"><label for="vst1_lane_s16"><div>void <b><b>vst1_lane_s16</b></b> (int16_t * ptr, int16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s16" type="checkbox"><label for="vst1q_lane_s16"><div>void <b><b>vst1q_lane_s16</b></b> (int16_t * ptr, int16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s32" type="checkbox"><label for="vst1_lane_s32"><div>void <b><b>vst1_lane_s32</b></b> (int32_t * ptr, int32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s32" type="checkbox"><label for="vst1q_lane_s32"><div>void <b><b>vst1q_lane_s32</b></b> (int32_t * ptr, int32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_s64" type="checkbox"><label for="vst1_lane_s64"><div>void <b><b>vst1_lane_s64</b></b> (int64_t * ptr, int64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_s64" type="checkbox"><label for="vst1q_lane_s64"><div>void <b><b>vst1q_lane_s64</b></b> (int64_t * ptr, int64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u8" type="checkbox"><label for="vst1_lane_u8"><div>void <b><b>vst1_lane_u8</b></b> (uint8_t * ptr, uint8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u8" type="checkbox"><label for="vst1q_lane_u8"><div>void <b><b>vst1q_lane_u8</b></b> (uint8_t * ptr, uint8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u16" type="checkbox"><label for="vst1_lane_u16"><div>void <b><b>vst1_lane_u16</b></b> (uint16_t * ptr, uint16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u16" type="checkbox"><label for="vst1q_lane_u16"><div>void <b><b>vst1q_lane_u16</b></b> (uint16_t * ptr, uint16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u32" type="checkbox"><label for="vst1_lane_u32"><div>void <b><b>vst1_lane_u32</b></b> (uint32_t * ptr, uint32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u32" type="checkbox"><label for="vst1q_lane_u32"><div>void <b><b>vst1q_lane_u32</b></b> (uint32_t * ptr, uint32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_u64" type="checkbox"><label for="vst1_lane_u64"><div>void <b><b>vst1_lane_u64</b></b> (uint64_t * ptr, uint64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_u64" type="checkbox"><label for="vst1q_lane_u64"><div>void <b><b>vst1q_lane_u64</b></b> (uint64_t * ptr, uint64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p64" type="checkbox"><label for="vst1_lane_p64"><div>void <b><b>vst1_lane_p64</b></b> (poly64_t * ptr, poly64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p64" type="checkbox"><label for="vst1q_lane_p64"><div>void <b><b>vst1q_lane_p64</b></b> (poly64_t * ptr, poly64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f16" type="checkbox"><label for="vst1_lane_f16"><div>void <b><b>vst1_lane_f16</b></b> (float16_t * ptr, float16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f16" type="checkbox"><label for="vst1q_lane_f16"><div>void <b><b>vst1q_lane_f16</b></b> (float16_t * ptr, float16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f32" type="checkbox"><label for="vst1_lane_f32"><div>void <b><b>vst1_lane_f32</b></b> (float32_t * ptr, float32x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f32" type="checkbox"><label for="vst1q_lane_f32"><div>void <b><b>vst1q_lane_f32</b></b> (float32_t * ptr, float32x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p8" type="checkbox"><label for="vst1_lane_p8"><div>void <b><b>vst1_lane_p8</b></b> (poly8_t * ptr, poly8x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p8" type="checkbox"><label for="vst1q_lane_p8"><div>void <b><b>vst1q_lane_p8</b></b> (poly8_t * ptr, poly8x16_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_p16" type="checkbox"><label for="vst1_lane_p16"><div>void <b><b>vst1_lane_p16</b></b> (poly16_t * ptr, poly16x4_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_p16" type="checkbox"><label for="vst1q_lane_p16"><div>void <b><b>vst1q_lane_p16</b></b> (poly16_t * ptr, poly16x8_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_lane_f64" type="checkbox"><label for="vst1_lane_f64"><div>void <b><b>vst1_lane_f64</b></b> (float64_t * ptr, float64x1_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_lane_f64" type="checkbox"><label for="vst1q_lane_f64"><div>void <b><b>vst1q_lane_f64</b></b> (float64_t * ptr, float64x2_t val, const int lane)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s8" type="checkbox"><label for="vld2_s8"><div>int8x8x2_t <b><b>vld2_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s8" type="checkbox"><label for="vld2q_s8"><div>int8x16x2_t <b><b>vld2q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s16" type="checkbox"><label for="vld2_s16"><div>int16x4x2_t <b><b>vld2_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s16" type="checkbox"><label for="vld2q_s16"><div>int16x8x2_t <b><b>vld2q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s32" type="checkbox"><label for="vld2_s32"><div>int32x2x2_t <b><b>vld2_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s32" type="checkbox"><label for="vld2q_s32"><div>int32x4x2_t <b><b>vld2q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u8" type="checkbox"><label for="vld2_u8"><div>uint8x8x2_t <b><b>vld2_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u8" type="checkbox"><label for="vld2q_u8"><div>uint8x16x2_t <b><b>vld2q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u16" type="checkbox"><label for="vld2_u16"><div>uint16x4x2_t <b><b>vld2_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u16" type="checkbox"><label for="vld2q_u16"><div>uint16x8x2_t <b><b>vld2q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u32" type="checkbox"><label for="vld2_u32"><div>uint32x2x2_t <b><b>vld2_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u32" type="checkbox"><label for="vld2q_u32"><div>uint32x4x2_t <b><b>vld2q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f16" type="checkbox"><label for="vld2_f16"><div>float16x4x2_t <b><b>vld2_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f16" type="checkbox"><label for="vld2q_f16"><div>float16x8x2_t <b><b>vld2q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f32" type="checkbox"><label for="vld2_f32"><div>float32x2x2_t <b><b>vld2_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f32" type="checkbox"><label for="vld2q_f32"><div>float32x4x2_t <b><b>vld2q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p8" type="checkbox"><label for="vld2_p8"><div>poly8x8x2_t <b><b>vld2_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p8" type="checkbox"><label for="vld2q_p8"><div>poly8x16x2_t <b><b>vld2q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p16" type="checkbox"><label for="vld2_p16"><div>poly16x4x2_t <b><b>vld2_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p16" type="checkbox"><label for="vld2q_p16"><div>poly16x8x2_t <b><b>vld2q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_s64" type="checkbox"><label for="vld2_s64"><div>int64x1x2_t <b><b>vld2_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_u64" type="checkbox"><label for="vld2_u64"><div>uint64x1x2_t <b><b>vld2_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_p64" type="checkbox"><label for="vld2_p64"><div>poly64x1x2_t <b><b>vld2_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_s64" type="checkbox"><label for="vld2q_s64"><div>int64x2x2_t <b><b>vld2q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_u64" type="checkbox"><label for="vld2q_u64"><div>uint64x2x2_t <b><b>vld2q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_p64" type="checkbox"><label for="vld2q_p64"><div>poly64x2x2_t <b><b>vld2q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_f64" type="checkbox"><label for="vld2_f64"><div>float64x1x2_t <b><b>vld2_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_f64" type="checkbox"><label for="vld2q_f64"><div>float64x2x2_t <b><b>vld2q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s8" type="checkbox"><label for="vld3_s8"><div>int8x8x3_t <b><b>vld3_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s8" type="checkbox"><label for="vld3q_s8"><div>int8x16x3_t <b><b>vld3q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s16" type="checkbox"><label for="vld3_s16"><div>int16x4x3_t <b><b>vld3_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s16" type="checkbox"><label for="vld3q_s16"><div>int16x8x3_t <b><b>vld3q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s32" type="checkbox"><label for="vld3_s32"><div>int32x2x3_t <b><b>vld3_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s32" type="checkbox"><label for="vld3q_s32"><div>int32x4x3_t <b><b>vld3q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u8" type="checkbox"><label for="vld3_u8"><div>uint8x8x3_t <b><b>vld3_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u8" type="checkbox"><label for="vld3q_u8"><div>uint8x16x3_t <b><b>vld3q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u16" type="checkbox"><label for="vld3_u16"><div>uint16x4x3_t <b><b>vld3_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u16" type="checkbox"><label for="vld3q_u16"><div>uint16x8x3_t <b><b>vld3q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u32" type="checkbox"><label for="vld3_u32"><div>uint32x2x3_t <b><b>vld3_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u32" type="checkbox"><label for="vld3q_u32"><div>uint32x4x3_t <b><b>vld3q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f16" type="checkbox"><label for="vld3_f16"><div>float16x4x3_t <b><b>vld3_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f16" type="checkbox"><label for="vld3q_f16"><div>float16x8x3_t <b><b>vld3q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f32" type="checkbox"><label for="vld3_f32"><div>float32x2x3_t <b><b>vld3_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f32" type="checkbox"><label for="vld3q_f32"><div>float32x4x3_t <b><b>vld3q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p8" type="checkbox"><label for="vld3_p8"><div>poly8x8x3_t <b><b>vld3_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p8" type="checkbox"><label for="vld3q_p8"><div>poly8x16x3_t <b><b>vld3q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p16" type="checkbox"><label for="vld3_p16"><div>poly16x4x3_t <b><b>vld3_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p16" type="checkbox"><label for="vld3q_p16"><div>poly16x8x3_t <b><b>vld3q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_s64" type="checkbox"><label for="vld3_s64"><div>int64x1x3_t <b><b>vld3_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_u64" type="checkbox"><label for="vld3_u64"><div>uint64x1x3_t <b><b>vld3_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_p64" type="checkbox"><label for="vld3_p64"><div>poly64x1x3_t <b><b>vld3_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_s64" type="checkbox"><label for="vld3q_s64"><div>int64x2x3_t <b><b>vld3q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_u64" type="checkbox"><label for="vld3q_u64"><div>uint64x2x3_t <b><b>vld3q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_p64" type="checkbox"><label for="vld3q_p64"><div>poly64x2x3_t <b><b>vld3q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_f64" type="checkbox"><label for="vld3_f64"><div>float64x1x3_t <b><b>vld3_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_f64" type="checkbox"><label for="vld3q_f64"><div>float64x2x3_t <b><b>vld3q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s8" type="checkbox"><label for="vld4_s8"><div>int8x8x4_t <b><b>vld4_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s8" type="checkbox"><label for="vld4q_s8"><div>int8x16x4_t <b><b>vld4q_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s16" type="checkbox"><label for="vld4_s16"><div>int16x4x4_t <b><b>vld4_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s16" type="checkbox"><label for="vld4q_s16"><div>int16x8x4_t <b><b>vld4q_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s32" type="checkbox"><label for="vld4_s32"><div>int32x2x4_t <b><b>vld4_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s32" type="checkbox"><label for="vld4q_s32"><div>int32x4x4_t <b><b>vld4q_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u8" type="checkbox"><label for="vld4_u8"><div>uint8x8x4_t <b><b>vld4_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u8" type="checkbox"><label for="vld4q_u8"><div>uint8x16x4_t <b><b>vld4q_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u16" type="checkbox"><label for="vld4_u16"><div>uint16x4x4_t <b><b>vld4_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u16" type="checkbox"><label for="vld4q_u16"><div>uint16x8x4_t <b><b>vld4q_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u32" type="checkbox"><label for="vld4_u32"><div>uint32x2x4_t <b><b>vld4_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u32" type="checkbox"><label for="vld4q_u32"><div>uint32x4x4_t <b><b>vld4q_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f16" type="checkbox"><label for="vld4_f16"><div>float16x4x4_t <b><b>vld4_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f16" type="checkbox"><label for="vld4q_f16"><div>float16x8x4_t <b><b>vld4q_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f32" type="checkbox"><label for="vld4_f32"><div>float32x2x4_t <b><b>vld4_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f32" type="checkbox"><label for="vld4q_f32"><div>float32x4x4_t <b><b>vld4q_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p8" type="checkbox"><label for="vld4_p8"><div>poly8x8x4_t <b><b>vld4_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p8" type="checkbox"><label for="vld4q_p8"><div>poly8x16x4_t <b><b>vld4q_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p16" type="checkbox"><label for="vld4_p16"><div>poly16x4x4_t <b><b>vld4_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p16" type="checkbox"><label for="vld4q_p16"><div>poly16x8x4_t <b><b>vld4q_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_s64" type="checkbox"><label for="vld4_s64"><div>int64x1x4_t <b><b>vld4_s64</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_u64" type="checkbox"><label for="vld4_u64"><div>uint64x1x4_t <b><b>vld4_u64</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_p64" type="checkbox"><label for="vld4_p64"><div>poly64x1x4_t <b><b>vld4_p64</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_s64" type="checkbox"><label for="vld4q_s64"><div>int64x2x4_t <b><b>vld4q_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_u64" type="checkbox"><label for="vld4q_u64"><div>uint64x2x4_t <b><b>vld4q_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_p64" type="checkbox"><label for="vld4q_p64"><div>poly64x2x4_t <b><b>vld4q_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_f64" type="checkbox"><label for="vld4_f64"><div>float64x1x4_t <b><b>vld4_f64</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_f64" type="checkbox"><label for="vld4q_f64"><div>float64x2x4_t <b><b>vld4q_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s8" type="checkbox"><label for="vld2_dup_s8"><div>int8x8x2_t <b><b>vld2_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s8" type="checkbox"><label for="vld2q_dup_s8"><div>int8x16x2_t <b><b>vld2q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s16" type="checkbox"><label for="vld2_dup_s16"><div>int16x4x2_t <b><b>vld2_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s16" type="checkbox"><label for="vld2q_dup_s16"><div>int16x8x2_t <b><b>vld2q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s32" type="checkbox"><label for="vld2_dup_s32"><div>int32x2x2_t <b><b>vld2_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s32" type="checkbox"><label for="vld2q_dup_s32"><div>int32x4x2_t <b><b>vld2q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u8" type="checkbox"><label for="vld2_dup_u8"><div>uint8x8x2_t <b><b>vld2_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u8" type="checkbox"><label for="vld2q_dup_u8"><div>uint8x16x2_t <b><b>vld2q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u16" type="checkbox"><label for="vld2_dup_u16"><div>uint16x4x2_t <b><b>vld2_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u16" type="checkbox"><label for="vld2q_dup_u16"><div>uint16x8x2_t <b><b>vld2q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u32" type="checkbox"><label for="vld2_dup_u32"><div>uint32x2x2_t <b><b>vld2_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u32" type="checkbox"><label for="vld2q_dup_u32"><div>uint32x4x2_t <b><b>vld2q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f16" type="checkbox"><label for="vld2_dup_f16"><div>float16x4x2_t <b><b>vld2_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f16" type="checkbox"><label for="vld2q_dup_f16"><div>float16x8x2_t <b><b>vld2q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f32" type="checkbox"><label for="vld2_dup_f32"><div>float32x2x2_t <b><b>vld2_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f32" type="checkbox"><label for="vld2q_dup_f32"><div>float32x4x2_t <b><b>vld2q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p8" type="checkbox"><label for="vld2_dup_p8"><div>poly8x8x2_t <b><b>vld2_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p8" type="checkbox"><label for="vld2q_dup_p8"><div>poly8x16x2_t <b><b>vld2q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p16" type="checkbox"><label for="vld2_dup_p16"><div>poly16x4x2_t <b><b>vld2_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p16" type="checkbox"><label for="vld2q_dup_p16"><div>poly16x8x2_t <b><b>vld2q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_s64" type="checkbox"><label for="vld2_dup_s64"><div>int64x1x2_t <b><b>vld2_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_u64" type="checkbox"><label for="vld2_dup_u64"><div>uint64x1x2_t <b><b>vld2_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_p64" type="checkbox"><label for="vld2_dup_p64"><div>poly64x1x2_t <b><b>vld2_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_s64" type="checkbox"><label for="vld2q_dup_s64"><div>int64x2x2_t <b><b>vld2q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_u64" type="checkbox"><label for="vld2q_dup_u64"><div>uint64x2x2_t <b><b>vld2q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_p64" type="checkbox"><label for="vld2q_dup_p64"><div>poly64x2x2_t <b><b>vld2q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_dup_f64" type="checkbox"><label for="vld2_dup_f64"><div>float64x1x2_t <b><b>vld2_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_dup_f64" type="checkbox"><label for="vld2q_dup_f64"><div>float64x2x2_t <b><b>vld2q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 2-element structure and replicate to all lanes of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2r-load-single-2-element-structure-and-replicate-to-all-lanes-of-two-registers">LD2R</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s8" type="checkbox"><label for="vld3_dup_s8"><div>int8x8x3_t <b><b>vld3_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s8" type="checkbox"><label for="vld3q_dup_s8"><div>int8x16x3_t <b><b>vld3q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s16" type="checkbox"><label for="vld3_dup_s16"><div>int16x4x3_t <b><b>vld3_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s16" type="checkbox"><label for="vld3q_dup_s16"><div>int16x8x3_t <b><b>vld3q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s32" type="checkbox"><label for="vld3_dup_s32"><div>int32x2x3_t <b><b>vld3_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s32" type="checkbox"><label for="vld3q_dup_s32"><div>int32x4x3_t <b><b>vld3q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u8" type="checkbox"><label for="vld3_dup_u8"><div>uint8x8x3_t <b><b>vld3_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u8" type="checkbox"><label for="vld3q_dup_u8"><div>uint8x16x3_t <b><b>vld3q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u16" type="checkbox"><label for="vld3_dup_u16"><div>uint16x4x3_t <b><b>vld3_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u16" type="checkbox"><label for="vld3q_dup_u16"><div>uint16x8x3_t <b><b>vld3q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u32" type="checkbox"><label for="vld3_dup_u32"><div>uint32x2x3_t <b><b>vld3_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u32" type="checkbox"><label for="vld3q_dup_u32"><div>uint32x4x3_t <b><b>vld3q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f16" type="checkbox"><label for="vld3_dup_f16"><div>float16x4x3_t <b><b>vld3_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f16" type="checkbox"><label for="vld3q_dup_f16"><div>float16x8x3_t <b><b>vld3q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f32" type="checkbox"><label for="vld3_dup_f32"><div>float32x2x3_t <b><b>vld3_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f32" type="checkbox"><label for="vld3q_dup_f32"><div>float32x4x3_t <b><b>vld3q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p8" type="checkbox"><label for="vld3_dup_p8"><div>poly8x8x3_t <b><b>vld3_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p8" type="checkbox"><label for="vld3q_dup_p8"><div>poly8x16x3_t <b><b>vld3q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p16" type="checkbox"><label for="vld3_dup_p16"><div>poly16x4x3_t <b><b>vld3_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p16" type="checkbox"><label for="vld3q_dup_p16"><div>poly16x8x3_t <b><b>vld3q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_s64" type="checkbox"><label for="vld3_dup_s64"><div>int64x1x3_t <b><b>vld3_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_u64" type="checkbox"><label for="vld3_dup_u64"><div>uint64x1x3_t <b><b>vld3_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_p64" type="checkbox"><label for="vld3_dup_p64"><div>poly64x1x3_t <b><b>vld3_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_s64" type="checkbox"><label for="vld3q_dup_s64"><div>int64x2x3_t <b><b>vld3q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_u64" type="checkbox"><label for="vld3q_dup_u64"><div>uint64x2x3_t <b><b>vld3q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_p64" type="checkbox"><label for="vld3q_dup_p64"><div>poly64x2x3_t <b><b>vld3q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_dup_f64" type="checkbox"><label for="vld3_dup_f64"><div>float64x1x3_t <b><b>vld3_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_dup_f64" type="checkbox"><label for="vld3q_dup_f64"><div>float64x2x3_t <b><b>vld3q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 3-element structure and replicate to all lanes of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3r-load-single-3-element-structure-and-replicate-to-all-lanes-of-three-registers">LD3R</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s8" type="checkbox"><label for="vld4_dup_s8"><div>int8x8x4_t <b><b>vld4_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s8" type="checkbox"><label for="vld4q_dup_s8"><div>int8x16x4_t <b><b>vld4q_dup_s8</b></b> (int8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s16" type="checkbox"><label for="vld4_dup_s16"><div>int16x4x4_t <b><b>vld4_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s16" type="checkbox"><label for="vld4q_dup_s16"><div>int16x8x4_t <b><b>vld4q_dup_s16</b></b> (int16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s32" type="checkbox"><label for="vld4_dup_s32"><div>int32x2x4_t <b><b>vld4_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s32" type="checkbox"><label for="vld4q_dup_s32"><div>int32x4x4_t <b><b>vld4q_dup_s32</b></b> (int32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u8" type="checkbox"><label for="vld4_dup_u8"><div>uint8x8x4_t <b><b>vld4_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u8" type="checkbox"><label for="vld4q_dup_u8"><div>uint8x16x4_t <b><b>vld4q_dup_u8</b></b> (uint8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u16" type="checkbox"><label for="vld4_dup_u16"><div>uint16x4x4_t <b><b>vld4_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u16" type="checkbox"><label for="vld4q_dup_u16"><div>uint16x8x4_t <b><b>vld4q_dup_u16</b></b> (uint16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u32" type="checkbox"><label for="vld4_dup_u32"><div>uint32x2x4_t <b><b>vld4_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u32" type="checkbox"><label for="vld4q_dup_u32"><div>uint32x4x4_t <b><b>vld4q_dup_u32</b></b> (uint32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f16" type="checkbox"><label for="vld4_dup_f16"><div>float16x4x4_t <b><b>vld4_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f16" type="checkbox"><label for="vld4q_dup_f16"><div>float16x8x4_t <b><b>vld4q_dup_f16</b></b> (float16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f32" type="checkbox"><label for="vld4_dup_f32"><div>float32x2x4_t <b><b>vld4_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f32" type="checkbox"><label for="vld4q_dup_f32"><div>float32x4x4_t <b><b>vld4q_dup_f32</b></b> (float32_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p8" type="checkbox"><label for="vld4_dup_p8"><div>poly8x8x4_t <b><b>vld4_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p8" type="checkbox"><label for="vld4q_dup_p8"><div>poly8x16x4_t <b><b>vld4q_dup_p8</b></b> (poly8_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p16" type="checkbox"><label for="vld4_dup_p16"><div>poly16x4x4_t <b><b>vld4_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p16" type="checkbox"><label for="vld4q_dup_p16"><div>poly16x8x4_t <b><b>vld4q_dup_p16</b></b> (poly16_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_s64" type="checkbox"><label for="vld4_dup_s64"><div>int64x1x4_t <b><b>vld4_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_u64" type="checkbox"><label for="vld4_dup_u64"><div>uint64x1x4_t <b><b>vld4_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_p64" type="checkbox"><label for="vld4_dup_p64"><div>poly64x1x4_t <b><b>vld4_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_s64" type="checkbox"><label for="vld4q_dup_s64"><div>int64x2x4_t <b><b>vld4q_dup_s64</b></b> (int64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_u64" type="checkbox"><label for="vld4q_dup_u64"><div>uint64x2x4_t <b><b>vld4q_dup_u64</b></b> (uint64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_p64" type="checkbox"><label for="vld4q_dup_p64"><div>poly64x2x4_t <b><b>vld4q_dup_p64</b></b> (poly64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_dup_f64" type="checkbox"><label for="vld4_dup_f64"><div>float64x1x4_t <b><b>vld4_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_dup_f64" type="checkbox"><label for="vld4q_dup_f64"><div>float64x2x4_t <b><b>vld4q_dup_f64</b></b> (float64_t const * ptr)<span class="right">Load single 4-element structure and replicate to all lanes of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4r-load-single-4-element-structure-and-replicate-to-all-lanes-of-four-registers">LD4R</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s8" type="checkbox"><label for="vst2_s8"><div>void <b><b>vst2_s8</b></b> (int8_t * ptr, int8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s8" type="checkbox"><label for="vst2q_s8"><div>void <b><b>vst2q_s8</b></b> (int8_t * ptr, int8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s16" type="checkbox"><label for="vst2_s16"><div>void <b><b>vst2_s16</b></b> (int16_t * ptr, int16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s16" type="checkbox"><label for="vst2q_s16"><div>void <b><b>vst2q_s16</b></b> (int16_t * ptr, int16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s32" type="checkbox"><label for="vst2_s32"><div>void <b><b>vst2_s32</b></b> (int32_t * ptr, int32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s32" type="checkbox"><label for="vst2q_s32"><div>void <b><b>vst2q_s32</b></b> (int32_t * ptr, int32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u8" type="checkbox"><label for="vst2_u8"><div>void <b><b>vst2_u8</b></b> (uint8_t * ptr, uint8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u8" type="checkbox"><label for="vst2q_u8"><div>void <b><b>vst2q_u8</b></b> (uint8_t * ptr, uint8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u16" type="checkbox"><label for="vst2_u16"><div>void <b><b>vst2_u16</b></b> (uint16_t * ptr, uint16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u16" type="checkbox"><label for="vst2q_u16"><div>void <b><b>vst2q_u16</b></b> (uint16_t * ptr, uint16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u32" type="checkbox"><label for="vst2_u32"><div>void <b><b>vst2_u32</b></b> (uint32_t * ptr, uint32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u32" type="checkbox"><label for="vst2q_u32"><div>void <b><b>vst2q_u32</b></b> (uint32_t * ptr, uint32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f16" type="checkbox"><label for="vst2_f16"><div>void <b><b>vst2_f16</b></b> (float16_t * ptr, float16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f16" type="checkbox"><label for="vst2q_f16"><div>void <b><b>vst2q_f16</b></b> (float16_t * ptr, float16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f32" type="checkbox"><label for="vst2_f32"><div>void <b><b>vst2_f32</b></b> (float32_t * ptr, float32x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f32" type="checkbox"><label for="vst2q_f32"><div>void <b><b>vst2q_f32</b></b> (float32_t * ptr, float32x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p8" type="checkbox"><label for="vst2_p8"><div>void <b><b>vst2_p8</b></b> (poly8_t * ptr, poly8x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p8" type="checkbox"><label for="vst2q_p8"><div>void <b><b>vst2q_p8</b></b> (poly8_t * ptr, poly8x16x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p16" type="checkbox"><label for="vst2_p16"><div>void <b><b>vst2_p16</b></b> (poly16_t * ptr, poly16x4x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p16" type="checkbox"><label for="vst2q_p16"><div>void <b><b>vst2q_p16</b></b> (poly16_t * ptr, poly16x8x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_s64" type="checkbox"><label for="vst2_s64"><div>void <b><b>vst2_s64</b></b> (int64_t * ptr, int64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_u64" type="checkbox"><label for="vst2_u64"><div>void <b><b>vst2_u64</b></b> (uint64_t * ptr, uint64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_p64" type="checkbox"><label for="vst2_p64"><div>void <b><b>vst2_p64</b></b> (poly64_t * ptr, poly64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_s64" type="checkbox"><label for="vst2q_s64"><div>void <b><b>vst2q_s64</b></b> (int64_t * ptr, int64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_u64" type="checkbox"><label for="vst2q_u64"><div>void <b><b>vst2q_u64</b></b> (uint64_t * ptr, uint64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_p64" type="checkbox"><label for="vst2q_p64"><div>void <b><b>vst2q_p64</b></b> (poly64_t * ptr, poly64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_f64" type="checkbox"><label for="vst2_f64"><div>void <b><b>vst2_f64</b></b> (float64_t * ptr, float64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_f64" type="checkbox"><label for="vst2q_f64"><div>void <b><b>vst2q_f64</b></b> (float64_t * ptr, float64x2x2_t val)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s8" type="checkbox"><label for="vst3_s8"><div>void <b><b>vst3_s8</b></b> (int8_t * ptr, int8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s8" type="checkbox"><label for="vst3q_s8"><div>void <b><b>vst3q_s8</b></b> (int8_t * ptr, int8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s16" type="checkbox"><label for="vst3_s16"><div>void <b><b>vst3_s16</b></b> (int16_t * ptr, int16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s16" type="checkbox"><label for="vst3q_s16"><div>void <b><b>vst3q_s16</b></b> (int16_t * ptr, int16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s32" type="checkbox"><label for="vst3_s32"><div>void <b><b>vst3_s32</b></b> (int32_t * ptr, int32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s32" type="checkbox"><label for="vst3q_s32"><div>void <b><b>vst3q_s32</b></b> (int32_t * ptr, int32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u8" type="checkbox"><label for="vst3_u8"><div>void <b><b>vst3_u8</b></b> (uint8_t * ptr, uint8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u8" type="checkbox"><label for="vst3q_u8"><div>void <b><b>vst3q_u8</b></b> (uint8_t * ptr, uint8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u16" type="checkbox"><label for="vst3_u16"><div>void <b><b>vst3_u16</b></b> (uint16_t * ptr, uint16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u16" type="checkbox"><label for="vst3q_u16"><div>void <b><b>vst3q_u16</b></b> (uint16_t * ptr, uint16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u32" type="checkbox"><label for="vst3_u32"><div>void <b><b>vst3_u32</b></b> (uint32_t * ptr, uint32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u32" type="checkbox"><label for="vst3q_u32"><div>void <b><b>vst3q_u32</b></b> (uint32_t * ptr, uint32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f16" type="checkbox"><label for="vst3_f16"><div>void <b><b>vst3_f16</b></b> (float16_t * ptr, float16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f16" type="checkbox"><label for="vst3q_f16"><div>void <b><b>vst3q_f16</b></b> (float16_t * ptr, float16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f32" type="checkbox"><label for="vst3_f32"><div>void <b><b>vst3_f32</b></b> (float32_t * ptr, float32x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f32" type="checkbox"><label for="vst3q_f32"><div>void <b><b>vst3q_f32</b></b> (float32_t * ptr, float32x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p8" type="checkbox"><label for="vst3_p8"><div>void <b><b>vst3_p8</b></b> (poly8_t * ptr, poly8x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p8" type="checkbox"><label for="vst3q_p8"><div>void <b><b>vst3q_p8</b></b> (poly8_t * ptr, poly8x16x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p16" type="checkbox"><label for="vst3_p16"><div>void <b><b>vst3_p16</b></b> (poly16_t * ptr, poly16x4x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p16" type="checkbox"><label for="vst3q_p16"><div>void <b><b>vst3q_p16</b></b> (poly16_t * ptr, poly16x8x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_s64" type="checkbox"><label for="vst3_s64"><div>void <b><b>vst3_s64</b></b> (int64_t * ptr, int64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_u64" type="checkbox"><label for="vst3_u64"><div>void <b><b>vst3_u64</b></b> (uint64_t * ptr, uint64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_p64" type="checkbox"><label for="vst3_p64"><div>void <b><b>vst3_p64</b></b> (poly64_t * ptr, poly64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_s64" type="checkbox"><label for="vst3q_s64"><div>void <b><b>vst3q_s64</b></b> (int64_t * ptr, int64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_u64" type="checkbox"><label for="vst3q_u64"><div>void <b><b>vst3q_u64</b></b> (uint64_t * ptr, uint64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_p64" type="checkbox"><label for="vst3q_p64"><div>void <b><b>vst3q_p64</b></b> (poly64_t * ptr, poly64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_f64" type="checkbox"><label for="vst3_f64"><div>void <b><b>vst3_f64</b></b> (float64_t * ptr, float64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_f64" type="checkbox"><label for="vst3q_f64"><div>void <b><b>vst3q_f64</b></b> (float64_t * ptr, float64x2x3_t val)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s8" type="checkbox"><label for="vst4_s8"><div>void <b><b>vst4_s8</b></b> (int8_t * ptr, int8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s8" type="checkbox"><label for="vst4q_s8"><div>void <b><b>vst4q_s8</b></b> (int8_t * ptr, int8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s16" type="checkbox"><label for="vst4_s16"><div>void <b><b>vst4_s16</b></b> (int16_t * ptr, int16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s16" type="checkbox"><label for="vst4q_s16"><div>void <b><b>vst4q_s16</b></b> (int16_t * ptr, int16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s32" type="checkbox"><label for="vst4_s32"><div>void <b><b>vst4_s32</b></b> (int32_t * ptr, int32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s32" type="checkbox"><label for="vst4q_s32"><div>void <b><b>vst4q_s32</b></b> (int32_t * ptr, int32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u8" type="checkbox"><label for="vst4_u8"><div>void <b><b>vst4_u8</b></b> (uint8_t * ptr, uint8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u8" type="checkbox"><label for="vst4q_u8"><div>void <b><b>vst4q_u8</b></b> (uint8_t * ptr, uint8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u16" type="checkbox"><label for="vst4_u16"><div>void <b><b>vst4_u16</b></b> (uint16_t * ptr, uint16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u16" type="checkbox"><label for="vst4q_u16"><div>void <b><b>vst4q_u16</b></b> (uint16_t * ptr, uint16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u32" type="checkbox"><label for="vst4_u32"><div>void <b><b>vst4_u32</b></b> (uint32_t * ptr, uint32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u32" type="checkbox"><label for="vst4q_u32"><div>void <b><b>vst4q_u32</b></b> (uint32_t * ptr, uint32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f16" type="checkbox"><label for="vst4_f16"><div>void <b><b>vst4_f16</b></b> (float16_t * ptr, float16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f16" type="checkbox"><label for="vst4q_f16"><div>void <b><b>vst4q_f16</b></b> (float16_t * ptr, float16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f32" type="checkbox"><label for="vst4_f32"><div>void <b><b>vst4_f32</b></b> (float32_t * ptr, float32x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f32" type="checkbox"><label for="vst4q_f32"><div>void <b><b>vst4q_f32</b></b> (float32_t * ptr, float32x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p8" type="checkbox"><label for="vst4_p8"><div>void <b><b>vst4_p8</b></b> (poly8_t * ptr, poly8x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p8" type="checkbox"><label for="vst4q_p8"><div>void <b><b>vst4q_p8</b></b> (poly8_t * ptr, poly8x16x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p16" type="checkbox"><label for="vst4_p16"><div>void <b><b>vst4_p16</b></b> (poly16_t * ptr, poly16x4x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p16" type="checkbox"><label for="vst4q_p16"><div>void <b><b>vst4q_p16</b></b> (poly16_t * ptr, poly16x8x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_s64" type="checkbox"><label for="vst4_s64"><div>void <b><b>vst4_s64</b></b> (int64_t * ptr, int64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_u64" type="checkbox"><label for="vst4_u64"><div>void <b><b>vst4_u64</b></b> (uint64_t * ptr, uint64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_p64" type="checkbox"><label for="vst4_p64"><div>void <b><b>vst4_p64</b></b> (poly64_t * ptr, poly64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_s64" type="checkbox"><label for="vst4q_s64"><div>void <b><b>vst4q_s64</b></b> (int64_t * ptr, int64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_u64" type="checkbox"><label for="vst4q_u64"><div>void <b><b>vst4q_u64</b></b> (uint64_t * ptr, uint64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_p64" type="checkbox"><label for="vst4q_p64"><div>void <b><b>vst4q_p64</b></b> (poly64_t * ptr, poly64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_f64" type="checkbox"><label for="vst4_f64"><div>void <b><b>vst4_f64</b></b> (float64_t * ptr, float64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_f64" type="checkbox"><label for="vst4q_f64"><div>void <b><b>vst4q_f64</b></b> (float64_t * ptr, float64x2x4_t val)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s16" type="checkbox"><label for="vld2_lane_s16"><div>int16x4x2_t <b><b>vld2_lane_s16</b></b> (int16_t const * ptr, int16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s16" type="checkbox"><label for="vld2q_lane_s16"><div>int16x8x2_t <b><b>vld2q_lane_s16</b></b> (int16_t const * ptr, int16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s32" type="checkbox"><label for="vld2_lane_s32"><div>int32x2x2_t <b><b>vld2_lane_s32</b></b> (int32_t const * ptr, int32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s32" type="checkbox"><label for="vld2q_lane_s32"><div>int32x4x2_t <b><b>vld2q_lane_s32</b></b> (int32_t const * ptr, int32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u16" type="checkbox"><label for="vld2_lane_u16"><div>uint16x4x2_t <b><b>vld2_lane_u16</b></b> (uint16_t const * ptr, uint16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u16" type="checkbox"><label for="vld2q_lane_u16"><div>uint16x8x2_t <b><b>vld2q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u32" type="checkbox"><label for="vld2_lane_u32"><div>uint32x2x2_t <b><b>vld2_lane_u32</b></b> (uint32_t const * ptr, uint32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u32" type="checkbox"><label for="vld2q_lane_u32"><div>uint32x4x2_t <b><b>vld2q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f16" type="checkbox"><label for="vld2_lane_f16"><div>float16x4x2_t <b><b>vld2_lane_f16</b></b> (float16_t const * ptr, float16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f16" type="checkbox"><label for="vld2q_lane_f16"><div>float16x8x2_t <b><b>vld2q_lane_f16</b></b> (float16_t const * ptr, float16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f32" type="checkbox"><label for="vld2_lane_f32"><div>float32x2x2_t <b><b>vld2_lane_f32</b></b> (float32_t const * ptr, float32x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f32" type="checkbox"><label for="vld2q_lane_f32"><div>float32x4x2_t <b><b>vld2q_lane_f32</b></b> (float32_t const * ptr, float32x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p16" type="checkbox"><label for="vld2_lane_p16"><div>poly16x4x2_t <b><b>vld2_lane_p16</b></b> (poly16_t const * ptr, poly16x4x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p16" type="checkbox"><label for="vld2q_lane_p16"><div>poly16x8x2_t <b><b>vld2q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s8" type="checkbox"><label for="vld2_lane_s8"><div>int8x8x2_t <b><b>vld2_lane_s8</b></b> (int8_t const * ptr, int8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u8" type="checkbox"><label for="vld2_lane_u8"><div>uint8x8x2_t <b><b>vld2_lane_u8</b></b> (uint8_t const * ptr, uint8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p8" type="checkbox"><label for="vld2_lane_p8"><div>poly8x8x2_t <b><b>vld2_lane_p8</b></b> (poly8_t const * ptr, poly8x8x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s8" type="checkbox"><label for="vld2q_lane_s8"><div>int8x16x2_t <b><b>vld2q_lane_s8</b></b> (int8_t const * ptr, int8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u8" type="checkbox"><label for="vld2q_lane_u8"><div>uint8x16x2_t <b><b>vld2q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p8" type="checkbox"><label for="vld2q_lane_p8"><div>poly8x16x2_t <b><b>vld2q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_s64" type="checkbox"><label for="vld2_lane_s64"><div>int64x1x2_t <b><b>vld2_lane_s64</b></b> (int64_t const * ptr, int64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>ptr &rarr; Xn
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_s64" type="checkbox"><label for="vld2q_lane_s64"><div>int64x2x2_t <b><b>vld2q_lane_s64</b></b> (int64_t const * ptr, int64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>ptr &rarr; Xn
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_u64" type="checkbox"><label for="vld2_lane_u64"><div>uint64x1x2_t <b><b>vld2_lane_u64</b></b> (uint64_t const * ptr, uint64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_u64" type="checkbox"><label for="vld2q_lane_u64"><div>uint64x2x2_t <b><b>vld2q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_p64" type="checkbox"><label for="vld2_lane_p64"><div>poly64x1x2_t <b><b>vld2_lane_p64</b></b> (poly64_t const * ptr, poly64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_p64" type="checkbox"><label for="vld2q_lane_p64"><div>poly64x2x2_t <b><b>vld2q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2_lane_f64" type="checkbox"><label for="vld2_lane_f64"><div>float64x1x2_t <b><b>vld2_lane_f64</b></b> (float64_t const * ptr, float64x1x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld2q_lane_f64" type="checkbox"><label for="vld2q_lane_f64"><div>float64x2x2_t <b><b>vld2q_lane_f64</b></b> (float64_t const * ptr, float64x2x2_t src, const int lane)<span class="right">Load single 2-element structure to one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld2-single-structure-load-single-2-element-structure-to-one-lane-of-two-registers">LD2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s16" type="checkbox"><label for="vld3_lane_s16"><div>int16x4x3_t <b><b>vld3_lane_s16</b></b> (int16_t const * ptr, int16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s16" type="checkbox"><label for="vld3q_lane_s16"><div>int16x8x3_t <b><b>vld3q_lane_s16</b></b> (int16_t const * ptr, int16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s32" type="checkbox"><label for="vld3_lane_s32"><div>int32x2x3_t <b><b>vld3_lane_s32</b></b> (int32_t const * ptr, int32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s32" type="checkbox"><label for="vld3q_lane_s32"><div>int32x4x3_t <b><b>vld3q_lane_s32</b></b> (int32_t const * ptr, int32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u16" type="checkbox"><label for="vld3_lane_u16"><div>uint16x4x3_t <b><b>vld3_lane_u16</b></b> (uint16_t const * ptr, uint16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u16" type="checkbox"><label for="vld3q_lane_u16"><div>uint16x8x3_t <b><b>vld3q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u32" type="checkbox"><label for="vld3_lane_u32"><div>uint32x2x3_t <b><b>vld3_lane_u32</b></b> (uint32_t const * ptr, uint32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u32" type="checkbox"><label for="vld3q_lane_u32"><div>uint32x4x3_t <b><b>vld3q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f16" type="checkbox"><label for="vld3_lane_f16"><div>float16x4x3_t <b><b>vld3_lane_f16</b></b> (float16_t const * ptr, float16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f16" type="checkbox"><label for="vld3q_lane_f16"><div>float16x8x3_t <b><b>vld3q_lane_f16</b></b> (float16_t const * ptr, float16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f32" type="checkbox"><label for="vld3_lane_f32"><div>float32x2x3_t <b><b>vld3_lane_f32</b></b> (float32_t const * ptr, float32x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f32" type="checkbox"><label for="vld3q_lane_f32"><div>float32x4x3_t <b><b>vld3q_lane_f32</b></b> (float32_t const * ptr, float32x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p16" type="checkbox"><label for="vld3_lane_p16"><div>poly16x4x3_t <b><b>vld3_lane_p16</b></b> (poly16_t const * ptr, poly16x4x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p16" type="checkbox"><label for="vld3q_lane_p16"><div>poly16x8x3_t <b><b>vld3q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s8" type="checkbox"><label for="vld3_lane_s8"><div>int8x8x3_t <b><b>vld3_lane_s8</b></b> (int8_t const * ptr, int8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u8" type="checkbox"><label for="vld3_lane_u8"><div>uint8x8x3_t <b><b>vld3_lane_u8</b></b> (uint8_t const * ptr, uint8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p8" type="checkbox"><label for="vld3_lane_p8"><div>poly8x8x3_t <b><b>vld3_lane_p8</b></b> (poly8_t const * ptr, poly8x8x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s8" type="checkbox"><label for="vld3q_lane_s8"><div>int8x16x3_t <b><b>vld3q_lane_s8</b></b> (int8_t const * ptr, int8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u8" type="checkbox"><label for="vld3q_lane_u8"><div>uint8x16x3_t <b><b>vld3q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p8" type="checkbox"><label for="vld3q_lane_p8"><div>poly8x16x3_t <b><b>vld3q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_s64" type="checkbox"><label for="vld3_lane_s64"><div>int64x1x3_t <b><b>vld3_lane_s64</b></b> (int64_t const * ptr, int64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_s64" type="checkbox"><label for="vld3q_lane_s64"><div>int64x2x3_t <b><b>vld3q_lane_s64</b></b> (int64_t const * ptr, int64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_u64" type="checkbox"><label for="vld3_lane_u64"><div>uint64x1x3_t <b><b>vld3_lane_u64</b></b> (uint64_t const * ptr, uint64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_u64" type="checkbox"><label for="vld3q_lane_u64"><div>uint64x2x3_t <b><b>vld3q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_p64" type="checkbox"><label for="vld3_lane_p64"><div>poly64x1x3_t <b><b>vld3_lane_p64</b></b> (poly64_t const * ptr, poly64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_p64" type="checkbox"><label for="vld3q_lane_p64"><div>poly64x2x3_t <b><b>vld3q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3_lane_f64" type="checkbox"><label for="vld3_lane_f64"><div>float64x1x3_t <b><b>vld3_lane_f64</b></b> (float64_t const * ptr, float64x1x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld3q_lane_f64" type="checkbox"><label for="vld3q_lane_f64"><div>float64x2x3_t <b><b>vld3q_lane_f64</b></b> (float64_t const * ptr, float64x2x3_t src, const int lane)<span class="right">Load single 3-element structure to one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld3-single-structure-load-single-3-element-structure-to-one-lane-of-three-registers">LD3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s16" type="checkbox"><label for="vld4_lane_s16"><div>int16x4x4_t <b><b>vld4_lane_s16</b></b> (int16_t const * ptr, int16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s16" type="checkbox"><label for="vld4q_lane_s16"><div>int16x8x4_t <b><b>vld4q_lane_s16</b></b> (int16_t const * ptr, int16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s32" type="checkbox"><label for="vld4_lane_s32"><div>int32x2x4_t <b><b>vld4_lane_s32</b></b> (int32_t const * ptr, int32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2S <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s32" type="checkbox"><label for="vld4q_lane_s32"><div>int32x4x4_t <b><b>vld4q_lane_s32</b></b> (int32_t const * ptr, int32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4S <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u16" type="checkbox"><label for="vld4_lane_u16"><div>uint16x4x4_t <b><b>vld4_lane_u16</b></b> (uint16_t const * ptr, uint16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u16" type="checkbox"><label for="vld4q_lane_u16"><div>uint16x8x4_t <b><b>vld4q_lane_u16</b></b> (uint16_t const * ptr, uint16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u32" type="checkbox"><label for="vld4_lane_u32"><div>uint32x2x4_t <b><b>vld4_lane_u32</b></b> (uint32_t const * ptr, uint32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2S <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u32" type="checkbox"><label for="vld4q_lane_u32"><div>uint32x4x4_t <b><b>vld4q_lane_u32</b></b> (uint32_t const * ptr, uint32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4S <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f16" type="checkbox"><label for="vld4_lane_f16"><div>float16x4x4_t <b><b>vld4_lane_f16</b></b> (float16_t const * ptr, float16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f16" type="checkbox"><label for="vld4q_lane_f16"><div>float16x8x4_t <b><b>vld4q_lane_f16</b></b> (float16_t const * ptr, float16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f32" type="checkbox"><label for="vld4_lane_f32"><div>float32x2x4_t <b><b>vld4_lane_f32</b></b> (float32_t const * ptr, float32x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2S <br />
+src.val[2] &rarr; Vt3.2S <br />
+src.val[1] &rarr; Vt2.2S <br />
+src.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f32" type="checkbox"><label for="vld4q_lane_f32"><div>float32x4x4_t <b><b>vld4q_lane_f32</b></b> (float32_t const * ptr, float32x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4S <br />
+src.val[2] &rarr; Vt3.4S <br />
+src.val[1] &rarr; Vt2.4S <br />
+src.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p16" type="checkbox"><label for="vld4_lane_p16"><div>poly16x4x4_t <b><b>vld4_lane_p16</b></b> (poly16_t const * ptr, poly16x4x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.4H <br />
+src.val[2] &rarr; Vt3.4H <br />
+src.val[1] &rarr; Vt2.4H <br />
+src.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p16" type="checkbox"><label for="vld4q_lane_p16"><div>poly16x8x4_t <b><b>vld4q_lane_p16</b></b> (poly16_t const * ptr, poly16x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8H <br />
+src.val[2] &rarr; Vt3.8H <br />
+src.val[1] &rarr; Vt2.8H <br />
+src.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s8" type="checkbox"><label for="vld4_lane_s8"><div>int8x8x4_t <b><b>vld4_lane_s8</b></b> (int8_t const * ptr, int8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8B <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u8" type="checkbox"><label for="vld4_lane_u8"><div>uint8x8x4_t <b><b>vld4_lane_u8</b></b> (uint8_t const * ptr, uint8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8B <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p8" type="checkbox"><label for="vld4_lane_p8"><div>poly8x8x4_t <b><b>vld4_lane_p8</b></b> (poly8_t const * ptr, poly8x8x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.8B <br />
+src.val[2] &rarr; Vt3.8B <br />
+src.val[1] &rarr; Vt2.8B <br />
+src.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s8" type="checkbox"><label for="vld4q_lane_s8"><div>int8x16x4_t <b><b>vld4q_lane_s8</b></b> (int8_t const * ptr, int8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.16B <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u8" type="checkbox"><label for="vld4q_lane_u8"><div>uint8x16x4_t <b><b>vld4q_lane_u8</b></b> (uint8_t const * ptr, uint8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.16B <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p8" type="checkbox"><label for="vld4q_lane_p8"><div>poly8x16x4_t <b><b>vld4q_lane_p8</b></b> (poly8_t const * ptr, poly8x16x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.16B <br />
+src.val[2] &rarr; Vt3.16B <br />
+src.val[1] &rarr; Vt2.16B <br />
+src.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_s64" type="checkbox"><label for="vld4_lane_s64"><div>int64x1x4_t <b><b>vld4_lane_s64</b></b> (int64_t const * ptr, int64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_s64" type="checkbox"><label for="vld4q_lane_s64"><div>int64x2x4_t <b><b>vld4q_lane_s64</b></b> (int64_t const * ptr, int64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_u64" type="checkbox"><label for="vld4_lane_u64"><div>uint64x1x4_t <b><b>vld4_lane_u64</b></b> (uint64_t const * ptr, uint64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_u64" type="checkbox"><label for="vld4q_lane_u64"><div>uint64x2x4_t <b><b>vld4q_lane_u64</b></b> (uint64_t const * ptr, uint64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_p64" type="checkbox"><label for="vld4_lane_p64"><div>poly64x1x4_t <b><b>vld4_lane_p64</b></b> (poly64_t const * ptr, poly64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_p64" type="checkbox"><label for="vld4q_lane_p64"><div>poly64x2x4_t <b><b>vld4q_lane_p64</b></b> (poly64_t const * ptr, poly64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4_lane_f64" type="checkbox"><label for="vld4_lane_f64"><div>float64x1x4_t <b><b>vld4_lane_f64</b></b> (float64_t const * ptr, float64x1x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.1D <br />
+src.val[2] &rarr; Vt3.1D <br />
+src.val[1] &rarr; Vt2.1D <br />
+src.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld4q_lane_f64" type="checkbox"><label for="vld4q_lane_f64"><div>float64x2x4_t <b><b>vld4q_lane_f64</b></b> (float64_t const * ptr, float64x2x4_t src, const int lane)<span class="right">Load single 4-element structure to one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&amp;FP registers without affecting the other bits of the registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld4-single-structure-load-single-4-element-structure-to-one-lane-of-four-registers">LD4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+src.val[3] &rarr; Vt4.2D <br />
+src.val[2] &rarr; Vt3.2D <br />
+src.val[1] &rarr; Vt2.2D <br />
+src.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s8" type="checkbox"><label for="vst2_lane_s8"><div>void <b><b>vst2_lane_s8</b></b> (int8_t * ptr, int8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u8" type="checkbox"><label for="vst2_lane_u8"><div>void <b><b>vst2_lane_u8</b></b> (uint8_t * ptr, uint8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p8" type="checkbox"><label for="vst2_lane_p8"><div>void <b><b>vst2_lane_p8</b></b> (poly8_t * ptr, poly8x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s8" type="checkbox"><label for="vst3_lane_s8"><div>void <b><b>vst3_lane_s8</b></b> (int8_t * ptr, int8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u8" type="checkbox"><label for="vst3_lane_u8"><div>void <b><b>vst3_lane_u8</b></b> (uint8_t * ptr, uint8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p8" type="checkbox"><label for="vst3_lane_p8"><div>void <b><b>vst3_lane_p8</b></b> (poly8_t * ptr, poly8x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s8" type="checkbox"><label for="vst4_lane_s8"><div>void <b><b>vst4_lane_s8</b></b> (int8_t * ptr, int8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u8" type="checkbox"><label for="vst4_lane_u8"><div>void <b><b>vst4_lane_u8</b></b> (uint8_t * ptr, uint8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p8" type="checkbox"><label for="vst4_lane_p8"><div>void <b><b>vst4_lane_p8</b></b> (poly8_t * ptr, poly8x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s16" type="checkbox"><label for="vst2_lane_s16"><div>void <b><b>vst2_lane_s16</b></b> (int16_t * ptr, int16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s16" type="checkbox"><label for="vst2q_lane_s16"><div>void <b><b>vst2q_lane_s16</b></b> (int16_t * ptr, int16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s32" type="checkbox"><label for="vst2_lane_s32"><div>void <b><b>vst2_lane_s32</b></b> (int32_t * ptr, int32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s32" type="checkbox"><label for="vst2q_lane_s32"><div>void <b><b>vst2q_lane_s32</b></b> (int32_t * ptr, int32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u16" type="checkbox"><label for="vst2_lane_u16"><div>void <b><b>vst2_lane_u16</b></b> (uint16_t * ptr, uint16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u16" type="checkbox"><label for="vst2q_lane_u16"><div>void <b><b>vst2q_lane_u16</b></b> (uint16_t * ptr, uint16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u32" type="checkbox"><label for="vst2_lane_u32"><div>void <b><b>vst2_lane_u32</b></b> (uint32_t * ptr, uint32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u32" type="checkbox"><label for="vst2q_lane_u32"><div>void <b><b>vst2q_lane_u32</b></b> (uint32_t * ptr, uint32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f16" type="checkbox"><label for="vst2_lane_f16"><div>void <b><b>vst2_lane_f16</b></b> (float16_t * ptr, float16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f16" type="checkbox"><label for="vst2q_lane_f16"><div>void <b><b>vst2q_lane_f16</b></b> (float16_t * ptr, float16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f32" type="checkbox"><label for="vst2_lane_f32"><div>void <b><b>vst2_lane_f32</b></b> (float32_t * ptr, float32x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f32" type="checkbox"><label for="vst2q_lane_f32"><div>void <b><b>vst2q_lane_f32</b></b> (float32_t * ptr, float32x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.s - Vt2.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p16" type="checkbox"><label for="vst2_lane_p16"><div>void <b><b>vst2_lane_p16</b></b> (poly16_t * ptr, poly16x4x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p16" type="checkbox"><label for="vst2q_lane_p16"><div>void <b><b>vst2q_lane_p16</b></b> (poly16_t * ptr, poly16x8x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.h - Vt2.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s8" type="checkbox"><label for="vst2q_lane_s8"><div>void <b><b>vst2q_lane_s8</b></b> (int8_t * ptr, int8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u8" type="checkbox"><label for="vst2q_lane_u8"><div>void <b><b>vst2q_lane_u8</b></b> (uint8_t * ptr, uint8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p8" type="checkbox"><label for="vst2q_lane_p8"><div>void <b><b>vst2q_lane_p8</b></b> (poly8_t * ptr, poly8x16x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.b - Vt2.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_s64" type="checkbox"><label for="vst2_lane_s64"><div>void <b><b>vst2_lane_s64</b></b> (int64_t * ptr, int64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_s64" type="checkbox"><label for="vst2q_lane_s64"><div>void <b><b>vst2q_lane_s64</b></b> (int64_t * ptr, int64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_u64" type="checkbox"><label for="vst2_lane_u64"><div>void <b><b>vst2_lane_u64</b></b> (uint64_t * ptr, uint64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_u64" type="checkbox"><label for="vst2q_lane_u64"><div>void <b><b>vst2q_lane_u64</b></b> (uint64_t * ptr, uint64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_p64" type="checkbox"><label for="vst2_lane_p64"><div>void <b><b>vst2_lane_p64</b></b> (poly64_t * ptr, poly64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_p64" type="checkbox"><label for="vst2q_lane_p64"><div>void <b><b>vst2q_lane_p64</b></b> (poly64_t * ptr, poly64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2_lane_f64" type="checkbox"><label for="vst2_lane_f64"><div>void <b><b>vst2_lane_f64</b></b> (float64_t * ptr, float64x1x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst2q_lane_f64" type="checkbox"><label for="vst2q_lane_f64"><div>void <b><b>vst2q_lane_f64</b></b> (float64_t * ptr, float64x2x2_t val, const int lane)<span class="right">Store single 2-element structure from one lane of two registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st2-single-structure-store-single-2-element-structure-from-one-lane-of-two-registers">ST2</a> {Vt.d - Vt2.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 2 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s16" type="checkbox"><label for="vst3_lane_s16"><div>void <b><b>vst3_lane_s16</b></b> (int16_t * ptr, int16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s16" type="checkbox"><label for="vst3q_lane_s16"><div>void <b><b>vst3q_lane_s16</b></b> (int16_t * ptr, int16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s32" type="checkbox"><label for="vst3_lane_s32"><div>void <b><b>vst3_lane_s32</b></b> (int32_t * ptr, int32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s32" type="checkbox"><label for="vst3q_lane_s32"><div>void <b><b>vst3q_lane_s32</b></b> (int32_t * ptr, int32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u16" type="checkbox"><label for="vst3_lane_u16"><div>void <b><b>vst3_lane_u16</b></b> (uint16_t * ptr, uint16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u16" type="checkbox"><label for="vst3q_lane_u16"><div>void <b><b>vst3q_lane_u16</b></b> (uint16_t * ptr, uint16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u32" type="checkbox"><label for="vst3_lane_u32"><div>void <b><b>vst3_lane_u32</b></b> (uint32_t * ptr, uint32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u32" type="checkbox"><label for="vst3q_lane_u32"><div>void <b><b>vst3q_lane_u32</b></b> (uint32_t * ptr, uint32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f16" type="checkbox"><label for="vst3_lane_f16"><div>void <b><b>vst3_lane_f16</b></b> (float16_t * ptr, float16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f16" type="checkbox"><label for="vst3q_lane_f16"><div>void <b><b>vst3q_lane_f16</b></b> (float16_t * ptr, float16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f32" type="checkbox"><label for="vst3_lane_f32"><div>void <b><b>vst3_lane_f32</b></b> (float32_t * ptr, float32x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f32" type="checkbox"><label for="vst3q_lane_f32"><div>void <b><b>vst3q_lane_f32</b></b> (float32_t * ptr, float32x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.s - Vt3.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p16" type="checkbox"><label for="vst3_lane_p16"><div>void <b><b>vst3_lane_p16</b></b> (poly16_t * ptr, poly16x4x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p16" type="checkbox"><label for="vst3q_lane_p16"><div>void <b><b>vst3q_lane_p16</b></b> (poly16_t * ptr, poly16x8x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.h - Vt3.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s8" type="checkbox"><label for="vst3q_lane_s8"><div>void <b><b>vst3q_lane_s8</b></b> (int8_t * ptr, int8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u8" type="checkbox"><label for="vst3q_lane_u8"><div>void <b><b>vst3q_lane_u8</b></b> (uint8_t * ptr, uint8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p8" type="checkbox"><label for="vst3q_lane_p8"><div>void <b><b>vst3q_lane_p8</b></b> (poly8_t * ptr, poly8x16x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.b - Vt3.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_s64" type="checkbox"><label for="vst3_lane_s64"><div>void <b><b>vst3_lane_s64</b></b> (int64_t * ptr, int64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_s64" type="checkbox"><label for="vst3q_lane_s64"><div>void <b><b>vst3q_lane_s64</b></b> (int64_t * ptr, int64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_u64" type="checkbox"><label for="vst3_lane_u64"><div>void <b><b>vst3_lane_u64</b></b> (uint64_t * ptr, uint64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_u64" type="checkbox"><label for="vst3q_lane_u64"><div>void <b><b>vst3q_lane_u64</b></b> (uint64_t * ptr, uint64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_p64" type="checkbox"><label for="vst3_lane_p64"><div>void <b><b>vst3_lane_p64</b></b> (poly64_t * ptr, poly64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_p64" type="checkbox"><label for="vst3q_lane_p64"><div>void <b><b>vst3q_lane_p64</b></b> (poly64_t * ptr, poly64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3_lane_f64" type="checkbox"><label for="vst3_lane_f64"><div>void <b><b>vst3_lane_f64</b></b> (float64_t * ptr, float64x1x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst3q_lane_f64" type="checkbox"><label for="vst3q_lane_f64"><div>void <b><b>vst3q_lane_f64</b></b> (float64_t * ptr, float64x2x3_t val, const int lane)<span class="right">Store single 3-element structure from one lane of three registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st3-single-structure-store-single-3-element-structure-from-one-lane-of-three-registers">ST3</a> {Vt.d - Vt3.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s16" type="checkbox"><label for="vst4_lane_s16"><div>void <b><b>vst4_lane_s16</b></b> (int16_t * ptr, int16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s16" type="checkbox"><label for="vst4q_lane_s16"><div>void <b><b>vst4q_lane_s16</b></b> (int16_t * ptr, int16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s32" type="checkbox"><label for="vst4_lane_s32"><div>void <b><b>vst4_lane_s32</b></b> (int32_t * ptr, int32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s32" type="checkbox"><label for="vst4q_lane_s32"><div>void <b><b>vst4q_lane_s32</b></b> (int32_t * ptr, int32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u16" type="checkbox"><label for="vst4_lane_u16"><div>void <b><b>vst4_lane_u16</b></b> (uint16_t * ptr, uint16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u16" type="checkbox"><label for="vst4q_lane_u16"><div>void <b><b>vst4q_lane_u16</b></b> (uint16_t * ptr, uint16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u32" type="checkbox"><label for="vst4_lane_u32"><div>void <b><b>vst4_lane_u32</b></b> (uint32_t * ptr, uint32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u32" type="checkbox"><label for="vst4q_lane_u32"><div>void <b><b>vst4q_lane_u32</b></b> (uint32_t * ptr, uint32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f16" type="checkbox"><label for="vst4_lane_f16"><div>void <b><b>vst4_lane_f16</b></b> (float16_t * ptr, float16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f16" type="checkbox"><label for="vst4q_lane_f16"><div>void <b><b>vst4q_lane_f16</b></b> (float16_t * ptr, float16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f32" type="checkbox"><label for="vst4_lane_f32"><div>void <b><b>vst4_lane_f32</b></b> (float32_t * ptr, float32x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f32" type="checkbox"><label for="vst4q_lane_f32"><div>void <b><b>vst4q_lane_f32</b></b> (float32_t * ptr, float32x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.s - Vt4.s}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p16" type="checkbox"><label for="vst4_lane_p16"><div>void <b><b>vst4_lane_p16</b></b> (poly16_t * ptr, poly16x4x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p16" type="checkbox"><label for="vst4q_lane_p16"><div>void <b><b>vst4q_lane_p16</b></b> (poly16_t * ptr, poly16x8x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.h - Vt4.h}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s8" type="checkbox"><label for="vst4q_lane_s8"><div>void <b><b>vst4q_lane_s8</b></b> (int8_t * ptr, int8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u8" type="checkbox"><label for="vst4q_lane_u8"><div>void <b><b>vst4q_lane_u8</b></b> (uint8_t * ptr, uint8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p8" type="checkbox"><label for="vst4q_lane_p8"><div>void <b><b>vst4q_lane_p8</b></b> (poly8_t * ptr, poly8x16x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.b - Vt4.b}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_s64" type="checkbox"><label for="vst4_lane_s64"><div>void <b><b>vst4_lane_s64</b></b> (int64_t * ptr, int64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_s64" type="checkbox"><label for="vst4q_lane_s64"><div>void <b><b>vst4q_lane_s64</b></b> (int64_t * ptr, int64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_u64" type="checkbox"><label for="vst4_lane_u64"><div>void <b><b>vst4_lane_u64</b></b> (uint64_t * ptr, uint64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_u64" type="checkbox"><label for="vst4q_lane_u64"><div>void <b><b>vst4q_lane_u64</b></b> (uint64_t * ptr, uint64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_p64" type="checkbox"><label for="vst4_lane_p64"><div>void <b><b>vst4_lane_p64</b></b> (poly64_t * ptr, poly64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_p64" type="checkbox"><label for="vst4q_lane_p64"><div>void <b><b>vst4q_lane_p64</b></b> (poly64_t * ptr, poly64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4_lane_f64" type="checkbox"><label for="vst4_lane_f64"><div>void <b><b>vst4_lane_f64</b></b> (float64_t * ptr, float64x1x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst4q_lane_f64" type="checkbox"><label for="vst4q_lane_f64"><div>void <b><b>vst4q_lane_f64</b></b> (float64_t * ptr, float64x2x4_t val, const int lane)<span class="right">Store single 4-element structure from one lane of four registers</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&amp;FP registers.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st4-single-structure-store-single-4-element-structure-from-one-lane-of-four-registers">ST4</a> {Vt.d - Vt4.d}[lane],[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x2" type="checkbox"><label for="vst1_s8_x2"><div>void <b><b>vst1_s8_x2</b></b> (int8_t * ptr, int8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x2" type="checkbox"><label for="vst1q_s8_x2"><div>void <b><b>vst1q_s8_x2</b></b> (int8_t * ptr, int8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x2" type="checkbox"><label for="vst1_s16_x2"><div>void <b><b>vst1_s16_x2</b></b> (int16_t * ptr, int16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x2" type="checkbox"><label for="vst1q_s16_x2"><div>void <b><b>vst1q_s16_x2</b></b> (int16_t * ptr, int16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x2" type="checkbox"><label for="vst1_s32_x2"><div>void <b><b>vst1_s32_x2</b></b> (int32_t * ptr, int32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x2" type="checkbox"><label for="vst1q_s32_x2"><div>void <b><b>vst1q_s32_x2</b></b> (int32_t * ptr, int32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x2" type="checkbox"><label for="vst1_u8_x2"><div>void <b><b>vst1_u8_x2</b></b> (uint8_t * ptr, uint8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x2" type="checkbox"><label for="vst1q_u8_x2"><div>void <b><b>vst1q_u8_x2</b></b> (uint8_t * ptr, uint8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x2" type="checkbox"><label for="vst1_u16_x2"><div>void <b><b>vst1_u16_x2</b></b> (uint16_t * ptr, uint16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x2" type="checkbox"><label for="vst1q_u16_x2"><div>void <b><b>vst1q_u16_x2</b></b> (uint16_t * ptr, uint16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x2" type="checkbox"><label for="vst1_u32_x2"><div>void <b><b>vst1_u32_x2</b></b> (uint32_t * ptr, uint32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x2" type="checkbox"><label for="vst1q_u32_x2"><div>void <b><b>vst1q_u32_x2</b></b> (uint32_t * ptr, uint32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x2" type="checkbox"><label for="vst1_f16_x2"><div>void <b><b>vst1_f16_x2</b></b> (float16_t * ptr, float16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x2" type="checkbox"><label for="vst1q_f16_x2"><div>void <b><b>vst1q_f16_x2</b></b> (float16_t * ptr, float16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x2" type="checkbox"><label for="vst1_f32_x2"><div>void <b><b>vst1_f32_x2</b></b> (float32_t * ptr, float32x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x2" type="checkbox"><label for="vst1q_f32_x2"><div>void <b><b>vst1q_f32_x2</b></b> (float32_t * ptr, float32x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x2" type="checkbox"><label for="vst1_p8_x2"><div>void <b><b>vst1_p8_x2</b></b> (poly8_t * ptr, poly8x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x2" type="checkbox"><label for="vst1q_p8_x2"><div>void <b><b>vst1q_p8_x2</b></b> (poly8_t * ptr, poly8x16x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x2" type="checkbox"><label for="vst1_p16_x2"><div>void <b><b>vst1_p16_x2</b></b> (poly16_t * ptr, poly16x4x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x2" type="checkbox"><label for="vst1q_p16_x2"><div>void <b><b>vst1q_p16_x2</b></b> (poly16_t * ptr, poly16x8x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x2" type="checkbox"><label for="vst1_s64_x2"><div>void <b><b>vst1_s64_x2</b></b> (int64_t * ptr, int64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x2" type="checkbox"><label for="vst1_u64_x2"><div>void <b><b>vst1_u64_x2</b></b> (uint64_t * ptr, uint64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x2" type="checkbox"><label for="vst1_p64_x2"><div>void <b><b>vst1_p64_x2</b></b> (poly64_t * ptr, poly64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x2" type="checkbox"><label for="vst1q_s64_x2"><div>void <b><b>vst1q_s64_x2</b></b> (int64_t * ptr, int64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x2" type="checkbox"><label for="vst1q_u64_x2"><div>void <b><b>vst1q_u64_x2</b></b> (uint64_t * ptr, uint64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x2" type="checkbox"><label for="vst1q_p64_x2"><div>void <b><b>vst1q_p64_x2</b></b> (poly64_t * ptr, poly64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x2" type="checkbox"><label for="vst1_f64_x2"><div>void <b><b>vst1_f64_x2</b></b> (float64_t * ptr, float64x1x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x2" type="checkbox"><label for="vst1q_f64_x2"><div>void <b><b>vst1q_f64_x2</b></b> (float64_t * ptr, float64x2x2_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x3" type="checkbox"><label for="vst1_s8_x3"><div>void <b><b>vst1_s8_x3</b></b> (int8_t * ptr, int8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x3" type="checkbox"><label for="vst1q_s8_x3"><div>void <b><b>vst1q_s8_x3</b></b> (int8_t * ptr, int8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x3" type="checkbox"><label for="vst1_s16_x3"><div>void <b><b>vst1_s16_x3</b></b> (int16_t * ptr, int16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x3" type="checkbox"><label for="vst1q_s16_x3"><div>void <b><b>vst1q_s16_x3</b></b> (int16_t * ptr, int16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x3" type="checkbox"><label for="vst1_s32_x3"><div>void <b><b>vst1_s32_x3</b></b> (int32_t * ptr, int32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x3" type="checkbox"><label for="vst1q_s32_x3"><div>void <b><b>vst1q_s32_x3</b></b> (int32_t * ptr, int32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x3" type="checkbox"><label for="vst1_u8_x3"><div>void <b><b>vst1_u8_x3</b></b> (uint8_t * ptr, uint8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x3" type="checkbox"><label for="vst1q_u8_x3"><div>void <b><b>vst1q_u8_x3</b></b> (uint8_t * ptr, uint8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x3" type="checkbox"><label for="vst1_u16_x3"><div>void <b><b>vst1_u16_x3</b></b> (uint16_t * ptr, uint16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x3" type="checkbox"><label for="vst1q_u16_x3"><div>void <b><b>vst1q_u16_x3</b></b> (uint16_t * ptr, uint16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x3" type="checkbox"><label for="vst1_u32_x3"><div>void <b><b>vst1_u32_x3</b></b> (uint32_t * ptr, uint32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x3" type="checkbox"><label for="vst1q_u32_x3"><div>void <b><b>vst1q_u32_x3</b></b> (uint32_t * ptr, uint32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x3" type="checkbox"><label for="vst1_f16_x3"><div>void <b><b>vst1_f16_x3</b></b> (float16_t * ptr, float16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x3" type="checkbox"><label for="vst1q_f16_x3"><div>void <b><b>vst1q_f16_x3</b></b> (float16_t * ptr, float16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x3" type="checkbox"><label for="vst1_f32_x3"><div>void <b><b>vst1_f32_x3</b></b> (float32_t * ptr, float32x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x3" type="checkbox"><label for="vst1q_f32_x3"><div>void <b><b>vst1q_f32_x3</b></b> (float32_t * ptr, float32x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x3" type="checkbox"><label for="vst1_p8_x3"><div>void <b><b>vst1_p8_x3</b></b> (poly8_t * ptr, poly8x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x3" type="checkbox"><label for="vst1q_p8_x3"><div>void <b><b>vst1q_p8_x3</b></b> (poly8_t * ptr, poly8x16x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x3" type="checkbox"><label for="vst1_p16_x3"><div>void <b><b>vst1_p16_x3</b></b> (poly16_t * ptr, poly16x4x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x3" type="checkbox"><label for="vst1q_p16_x3"><div>void <b><b>vst1q_p16_x3</b></b> (poly16_t * ptr, poly16x8x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x3" type="checkbox"><label for="vst1_s64_x3"><div>void <b><b>vst1_s64_x3</b></b> (int64_t * ptr, int64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x3" type="checkbox"><label for="vst1_u64_x3"><div>void <b><b>vst1_u64_x3</b></b> (uint64_t * ptr, uint64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x3" type="checkbox"><label for="vst1_p64_x3"><div>void <b><b>vst1_p64_x3</b></b> (poly64_t * ptr, poly64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x3" type="checkbox"><label for="vst1q_s64_x3"><div>void <b><b>vst1q_s64_x3</b></b> (int64_t * ptr, int64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x3" type="checkbox"><label for="vst1q_u64_x3"><div>void <b><b>vst1q_u64_x3</b></b> (uint64_t * ptr, uint64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x3" type="checkbox"><label for="vst1q_p64_x3"><div>void <b><b>vst1q_p64_x3</b></b> (poly64_t * ptr, poly64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x3" type="checkbox"><label for="vst1_f64_x3"><div>void <b><b>vst1_f64_x3</b></b> (float64_t * ptr, float64x1x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x3" type="checkbox"><label for="vst1q_f64_x3"><div>void <b><b>vst1q_f64_x3</b></b> (float64_t * ptr, float64x2x3_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s8_x4" type="checkbox"><label for="vst1_s8_x4"><div>void <b><b>vst1_s8_x4</b></b> (int8_t * ptr, int8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s8_x4" type="checkbox"><label for="vst1q_s8_x4"><div>void <b><b>vst1q_s8_x4</b></b> (int8_t * ptr, int8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s16_x4" type="checkbox"><label for="vst1_s16_x4"><div>void <b><b>vst1_s16_x4</b></b> (int16_t * ptr, int16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s16_x4" type="checkbox"><label for="vst1q_s16_x4"><div>void <b><b>vst1q_s16_x4</b></b> (int16_t * ptr, int16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s32_x4" type="checkbox"><label for="vst1_s32_x4"><div>void <b><b>vst1_s32_x4</b></b> (int32_t * ptr, int32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s32_x4" type="checkbox"><label for="vst1q_s32_x4"><div>void <b><b>vst1q_s32_x4</b></b> (int32_t * ptr, int32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u8_x4" type="checkbox"><label for="vst1_u8_x4"><div>void <b><b>vst1_u8_x4</b></b> (uint8_t * ptr, uint8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u8_x4" type="checkbox"><label for="vst1q_u8_x4"><div>void <b><b>vst1q_u8_x4</b></b> (uint8_t * ptr, uint8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u16_x4" type="checkbox"><label for="vst1_u16_x4"><div>void <b><b>vst1_u16_x4</b></b> (uint16_t * ptr, uint16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u16_x4" type="checkbox"><label for="vst1q_u16_x4"><div>void <b><b>vst1q_u16_x4</b></b> (uint16_t * ptr, uint16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u32_x4" type="checkbox"><label for="vst1_u32_x4"><div>void <b><b>vst1_u32_x4</b></b> (uint32_t * ptr, uint32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u32_x4" type="checkbox"><label for="vst1q_u32_x4"><div>void <b><b>vst1q_u32_x4</b></b> (uint32_t * ptr, uint32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f16_x4" type="checkbox"><label for="vst1_f16_x4"><div>void <b><b>vst1_f16_x4</b></b> (float16_t * ptr, float16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f16_x4" type="checkbox"><label for="vst1q_f16_x4"><div>void <b><b>vst1q_f16_x4</b></b> (float16_t * ptr, float16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f32_x4" type="checkbox"><label for="vst1_f32_x4"><div>void <b><b>vst1_f32_x4</b></b> (float32_t * ptr, float32x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2S <br />
+val.val[2] &rarr; Vt3.2S <br />
+val.val[1] &rarr; Vt2.2S <br />
+val.val[0] &rarr; Vt.2S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f32_x4" type="checkbox"><label for="vst1q_f32_x4"><div>void <b><b>vst1q_f32_x4</b></b> (float32_t * ptr, float32x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4S <br />
+val.val[2] &rarr; Vt3.4S <br />
+val.val[1] &rarr; Vt2.4S <br />
+val.val[0] &rarr; Vt.4S </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p8_x4" type="checkbox"><label for="vst1_p8_x4"><div>void <b><b>vst1_p8_x4</b></b> (poly8_t * ptr, poly8x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8B <br />
+val.val[2] &rarr; Vt3.8B <br />
+val.val[1] &rarr; Vt2.8B <br />
+val.val[0] &rarr; Vt.8B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p8_x4" type="checkbox"><label for="vst1q_p8_x4"><div>void <b><b>vst1q_p8_x4</b></b> (poly8_t * ptr, poly8x16x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.16B <br />
+val.val[2] &rarr; Vt3.16B <br />
+val.val[1] &rarr; Vt2.16B <br />
+val.val[0] &rarr; Vt.16B </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p16_x4" type="checkbox"><label for="vst1_p16_x4"><div>void <b><b>vst1_p16_x4</b></b> (poly16_t * ptr, poly16x4x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.4H <br />
+val.val[2] &rarr; Vt3.4H <br />
+val.val[1] &rarr; Vt2.4H <br />
+val.val[0] &rarr; Vt.4H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p16_x4" type="checkbox"><label for="vst1q_p16_x4"><div>void <b><b>vst1q_p16_x4</b></b> (poly16_t * ptr, poly16x8x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.8H <br />
+val.val[2] &rarr; Vt3.8H <br />
+val.val[1] &rarr; Vt2.8H <br />
+val.val[0] &rarr; Vt.8H </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_s64_x4" type="checkbox"><label for="vst1_s64_x4"><div>void <b><b>vst1_s64_x4</b></b> (int64_t * ptr, int64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_u64_x4" type="checkbox"><label for="vst1_u64_x4"><div>void <b><b>vst1_u64_x4</b></b> (uint64_t * ptr, uint64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_p64_x4" type="checkbox"><label for="vst1_p64_x4"><div>void <b><b>vst1_p64_x4</b></b> (poly64_t * ptr, poly64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_s64_x4" type="checkbox"><label for="vst1q_s64_x4"><div>void <b><b>vst1q_s64_x4</b></b> (int64_t * ptr, int64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_u64_x4" type="checkbox"><label for="vst1q_u64_x4"><div>void <b><b>vst1q_u64_x4</b></b> (uint64_t * ptr, uint64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_p64_x4" type="checkbox"><label for="vst1q_p64_x4"><div>void <b><b>vst1q_p64_x4</b></b> (poly64_t * ptr, poly64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vst1_f64_x4" type="checkbox"><label for="vst1_f64_x4"><div>void <b><b>vst1_f64_x4</b></b> (float64_t * ptr, float64x1x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.1D <br />
+val.val[2] &rarr; Vt3.1D <br />
+val.val[1] &rarr; Vt2.1D <br />
+val.val[0] &rarr; Vt.1D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vst1q_f64_x4" type="checkbox"><label for="vst1q_f64_x4"><div>void <b><b>vst1q_f64_x4</b></b> (float64_t * ptr, float64x2x4_t val)<span class="right">Store a single-element structure from one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&amp;FP register to memory.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/st1-single-structure-store-a-single-element-structure-from-one-lane-of-one-register">ST1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val.val[3] &rarr; Vt4.2D <br />
+val.val[2] &rarr; Vt3.2D <br />
+val.val[1] &rarr; Vt2.2D <br />
+val.val[0] &rarr; Vt.2D </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x2" type="checkbox"><label for="vld1_s8_x2"><div>int8x8x2_t <b><b>vld1_s8_x2</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x2" type="checkbox"><label for="vld1q_s8_x2"><div>int8x16x2_t <b><b>vld1q_s8_x2</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x2" type="checkbox"><label for="vld1_s16_x2"><div>int16x4x2_t <b><b>vld1_s16_x2</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x2" type="checkbox"><label for="vld1q_s16_x2"><div>int16x8x2_t <b><b>vld1q_s16_x2</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x2" type="checkbox"><label for="vld1_s32_x2"><div>int32x2x2_t <b><b>vld1_s32_x2</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x2" type="checkbox"><label for="vld1q_s32_x2"><div>int32x4x2_t <b><b>vld1q_s32_x2</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x2" type="checkbox"><label for="vld1_u8_x2"><div>uint8x8x2_t <b><b>vld1_u8_x2</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x2" type="checkbox"><label for="vld1q_u8_x2"><div>uint8x16x2_t <b><b>vld1q_u8_x2</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x2" type="checkbox"><label for="vld1_u16_x2"><div>uint16x4x2_t <b><b>vld1_u16_x2</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x2" type="checkbox"><label for="vld1q_u16_x2"><div>uint16x8x2_t <b><b>vld1q_u16_x2</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x2" type="checkbox"><label for="vld1_u32_x2"><div>uint32x2x2_t <b><b>vld1_u32_x2</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x2" type="checkbox"><label for="vld1q_u32_x2"><div>uint32x4x2_t <b><b>vld1q_u32_x2</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x2" type="checkbox"><label for="vld1_f16_x2"><div>float16x4x2_t <b><b>vld1_f16_x2</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x2" type="checkbox"><label for="vld1q_f16_x2"><div>float16x8x2_t <b><b>vld1q_f16_x2</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x2" type="checkbox"><label for="vld1_f32_x2"><div>float32x2x2_t <b><b>vld1_f32_x2</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt2.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x2" type="checkbox"><label for="vld1q_f32_x2"><div>float32x4x2_t <b><b>vld1q_f32_x2</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt2.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x2" type="checkbox"><label for="vld1_p8_x2"><div>poly8x8x2_t <b><b>vld1_p8_x2</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt2.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x2" type="checkbox"><label for="vld1q_p8_x2"><div>poly8x16x2_t <b><b>vld1q_p8_x2</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt2.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x2" type="checkbox"><label for="vld1_p16_x2"><div>poly16x4x2_t <b><b>vld1_p16_x2</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt2.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x2" type="checkbox"><label for="vld1q_p16_x2"><div>poly16x8x2_t <b><b>vld1q_p16_x2</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt2.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x2" type="checkbox"><label for="vld1_s64_x2"><div>int64x1x2_t <b><b>vld1_s64_x2</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x2" type="checkbox"><label for="vld1_u64_x2"><div>uint64x1x2_t <b><b>vld1_u64_x2</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x2" type="checkbox"><label for="vld1_p64_x2"><div>poly64x1x2_t <b><b>vld1_p64_x2</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x2" type="checkbox"><label for="vld1q_s64_x2"><div>int64x2x2_t <b><b>vld1q_s64_x2</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x2" type="checkbox"><label for="vld1q_u64_x2"><div>uint64x2x2_t <b><b>vld1q_u64_x2</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x2" type="checkbox"><label for="vld1q_p64_x2"><div>poly64x2x2_t <b><b>vld1q_p64_x2</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x2" type="checkbox"><label for="vld1_f64_x2"><div>float64x1x2_t <b><b>vld1_f64_x2</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt2.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x2" type="checkbox"><label for="vld1q_f64_x2"><div>float64x2x2_t <b><b>vld1q_f64_x2</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt2.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x3" type="checkbox"><label for="vld1_s8_x3"><div>int8x8x3_t <b><b>vld1_s8_x3</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x3" type="checkbox"><label for="vld1q_s8_x3"><div>int8x16x3_t <b><b>vld1q_s8_x3</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x3" type="checkbox"><label for="vld1_s16_x3"><div>int16x4x3_t <b><b>vld1_s16_x3</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x3" type="checkbox"><label for="vld1q_s16_x3"><div>int16x8x3_t <b><b>vld1q_s16_x3</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x3" type="checkbox"><label for="vld1_s32_x3"><div>int32x2x3_t <b><b>vld1_s32_x3</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x3" type="checkbox"><label for="vld1q_s32_x3"><div>int32x4x3_t <b><b>vld1q_s32_x3</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x3" type="checkbox"><label for="vld1_u8_x3"><div>uint8x8x3_t <b><b>vld1_u8_x3</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x3" type="checkbox"><label for="vld1q_u8_x3"><div>uint8x16x3_t <b><b>vld1q_u8_x3</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x3" type="checkbox"><label for="vld1_u16_x3"><div>uint16x4x3_t <b><b>vld1_u16_x3</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x3" type="checkbox"><label for="vld1q_u16_x3"><div>uint16x8x3_t <b><b>vld1q_u16_x3</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x3" type="checkbox"><label for="vld1_u32_x3"><div>uint32x2x3_t <b><b>vld1_u32_x3</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x3" type="checkbox"><label for="vld1q_u32_x3"><div>uint32x4x3_t <b><b>vld1q_u32_x3</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x3" type="checkbox"><label for="vld1_f16_x3"><div>float16x4x3_t <b><b>vld1_f16_x3</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x3" type="checkbox"><label for="vld1q_f16_x3"><div>float16x8x3_t <b><b>vld1q_f16_x3</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x3" type="checkbox"><label for="vld1_f32_x3"><div>float32x2x3_t <b><b>vld1_f32_x3</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt3.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x3" type="checkbox"><label for="vld1q_f32_x3"><div>float32x4x3_t <b><b>vld1q_f32_x3</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt3.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x3" type="checkbox"><label for="vld1_p8_x3"><div>poly8x8x3_t <b><b>vld1_p8_x3</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt3.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x3" type="checkbox"><label for="vld1q_p8_x3"><div>poly8x16x3_t <b><b>vld1q_p8_x3</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt3.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x3" type="checkbox"><label for="vld1_p16_x3"><div>poly16x4x3_t <b><b>vld1_p16_x3</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt3.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x3" type="checkbox"><label for="vld1q_p16_x3"><div>poly16x8x3_t <b><b>vld1q_p16_x3</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt3.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x3" type="checkbox"><label for="vld1_s64_x3"><div>int64x1x3_t <b><b>vld1_s64_x3</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x3" type="checkbox"><label for="vld1_u64_x3"><div>uint64x1x3_t <b><b>vld1_u64_x3</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x3" type="checkbox"><label for="vld1_p64_x3"><div>poly64x1x3_t <b><b>vld1_p64_x3</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x3" type="checkbox"><label for="vld1q_s64_x3"><div>int64x2x3_t <b><b>vld1q_s64_x3</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x3" type="checkbox"><label for="vld1q_u64_x3"><div>uint64x2x3_t <b><b>vld1q_u64_x3</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x3" type="checkbox"><label for="vld1q_p64_x3"><div>poly64x2x3_t <b><b>vld1q_p64_x3</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x3" type="checkbox"><label for="vld1_f64_x3"><div>float64x1x3_t <b><b>vld1_f64_x3</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt3.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x3" type="checkbox"><label for="vld1q_f64_x3"><div>float64x2x3_t <b><b>vld1q_f64_x3</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt3.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s8_x4" type="checkbox"><label for="vld1_s8_x4"><div>int8x8x4_t <b><b>vld1_s8_x4</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s8_x4" type="checkbox"><label for="vld1q_s8_x4"><div>int8x16x4_t <b><b>vld1q_s8_x4</b></b> (int8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s16_x4" type="checkbox"><label for="vld1_s16_x4"><div>int16x4x4_t <b><b>vld1_s16_x4</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s16_x4" type="checkbox"><label for="vld1q_s16_x4"><div>int16x8x4_t <b><b>vld1q_s16_x4</b></b> (int16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s32_x4" type="checkbox"><label for="vld1_s32_x4"><div>int32x2x4_t <b><b>vld1_s32_x4</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s32_x4" type="checkbox"><label for="vld1q_s32_x4"><div>int32x4x4_t <b><b>vld1q_s32_x4</b></b> (int32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u8_x4" type="checkbox"><label for="vld1_u8_x4"><div>uint8x8x4_t <b><b>vld1_u8_x4</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u8_x4" type="checkbox"><label for="vld1q_u8_x4"><div>uint8x16x4_t <b><b>vld1q_u8_x4</b></b> (uint8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u16_x4" type="checkbox"><label for="vld1_u16_x4"><div>uint16x4x4_t <b><b>vld1_u16_x4</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u16_x4" type="checkbox"><label for="vld1q_u16_x4"><div>uint16x8x4_t <b><b>vld1q_u16_x4</b></b> (uint16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u32_x4" type="checkbox"><label for="vld1_u32_x4"><div>uint32x2x4_t <b><b>vld1_u32_x4</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u32_x4" type="checkbox"><label for="vld1q_u32_x4"><div>uint32x4x4_t <b><b>vld1q_u32_x4</b></b> (uint32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f16_x4" type="checkbox"><label for="vld1_f16_x4"><div>float16x4x4_t <b><b>vld1_f16_x4</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f16_x4" type="checkbox"><label for="vld1q_f16_x4"><div>float16x8x4_t <b><b>vld1q_f16_x4</b></b> (float16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f32_x4" type="checkbox"><label for="vld1_f32_x4"><div>float32x2x4_t <b><b>vld1_f32_x4</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2S - Vt4.2S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2S &rarr; result.val[3]
+Vt3.2S &rarr; result.val[2]
+Vt2.2S &rarr; result.val[1]
+Vt.2S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f32_x4" type="checkbox"><label for="vld1q_f32_x4"><div>float32x4x4_t <b><b>vld1q_f32_x4</b></b> (float32_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4S - Vt4.4S},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4S &rarr; result.val[3]
+Vt3.4S &rarr; result.val[2]
+Vt2.4S &rarr; result.val[1]
+Vt.4S &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p8_x4" type="checkbox"><label for="vld1_p8_x4"><div>poly8x8x4_t <b><b>vld1_p8_x4</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8B - Vt4.8B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8B &rarr; result.val[3]
+Vt3.8B &rarr; result.val[2]
+Vt2.8B &rarr; result.val[1]
+Vt.8B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p8_x4" type="checkbox"><label for="vld1q_p8_x4"><div>poly8x16x4_t <b><b>vld1q_p8_x4</b></b> (poly8_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.16B - Vt4.16B},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.16B &rarr; result.val[3]
+Vt3.16B &rarr; result.val[2]
+Vt2.16B &rarr; result.val[1]
+Vt.16B &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p16_x4" type="checkbox"><label for="vld1_p16_x4"><div>poly16x4x4_t <b><b>vld1_p16_x4</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.4H - Vt4.4H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.4H &rarr; result.val[3]
+Vt3.4H &rarr; result.val[2]
+Vt2.4H &rarr; result.val[1]
+Vt.4H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p16_x4" type="checkbox"><label for="vld1q_p16_x4"><div>poly16x8x4_t <b><b>vld1q_p16_x4</b></b> (poly16_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.8H - Vt4.8H},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.8H &rarr; result.val[3]
+Vt3.8H &rarr; result.val[2]
+Vt2.8H &rarr; result.val[1]
+Vt.8H &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_s64_x4" type="checkbox"><label for="vld1_s64_x4"><div>int64x1x4_t <b><b>vld1_s64_x4</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_u64_x4" type="checkbox"><label for="vld1_u64_x4"><div>uint64x1x4_t <b><b>vld1_u64_x4</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_p64_x4" type="checkbox"><label for="vld1_p64_x4"><div>poly64x1x4_t <b><b>vld1_p64_x4</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_s64_x4" type="checkbox"><label for="vld1q_s64_x4"><div>int64x2x4_t <b><b>vld1q_s64_x4</b></b> (int64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_u64_x4" type="checkbox"><label for="vld1q_u64_x4"><div>uint64x2x4_t <b><b>vld1q_u64_x4</b></b> (uint64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_p64_x4" type="checkbox"><label for="vld1q_p64_x4"><div>poly64x2x4_t <b><b>vld1q_p64_x4</b></b> (poly64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vld1_f64_x4" type="checkbox"><label for="vld1_f64_x4"><div>float64x1x4_t <b><b>vld1_f64_x4</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.1D - Vt4.1D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.1D &rarr; result.val[3]
+Vt3.1D &rarr; result.val[2]
+Vt2.1D &rarr; result.val[1]
+Vt.1D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vld1q_f64_x4" type="checkbox"><label for="vld1q_f64_x4"><div>float64x2x4_t <b><b>vld1q_f64_x4</b></b> (float64_t const * ptr)<span class="right">Load one single-element structure to one lane of one register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&amp;FP register without affecting the other bits of the register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ld1-single-structure-load-one-single-element-structure-to-one-lane-of-one-register">LD1</a> {Vt.2D - Vt4.2D},[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Vt4.2D &rarr; result.val[3]
+Vt3.2D &rarr; result.val[2]
+Vt2.2D &rarr; result.val[1]
+Vt.2D &rarr; result.val[0]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    SetNotTagCheckedInstruction(!wback &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        // replicate to fill 128- or 64-bit register
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Replicate.2" title="function: bits(M*N) Replicate(bits(M) x, integer N)">Replicate</a>(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        if memop == <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a> then
+            // insert into one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[rval, index, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address+offs, ebytes, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];
+    if n == 31 then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.write.0" title="accessor: SP[] = bits(width) value">SP</a>[] = address + offs;
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[n] = address + offs;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s8" type="checkbox"><label for="vpadd_s8"><div>int8x8_t <b><b>vpadd_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s16" type="checkbox"><label for="vpadd_s16"><div>int16x4_t <b><b>vpadd_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_s32" type="checkbox"><label for="vpadd_s32"><div>int32x2_t <b><b>vpadd_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u8" type="checkbox"><label for="vpadd_u8"><div>uint8x8_t <b><b>vpadd_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u16" type="checkbox"><label for="vpadd_u16"><div>uint16x4_t <b><b>vpadd_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_u32" type="checkbox"><label for="vpadd_u32"><div>uint32x2_t <b><b>vpadd_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadd_f32" type="checkbox"><label for="vpadd_f32"><div>float32x2_t <b><b>vpadd_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s8" type="checkbox"><label for="vpaddq_s8"><div>int8x16_t <b><b>vpaddq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s16" type="checkbox"><label for="vpaddq_s16"><div>int16x8_t <b><b>vpaddq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s32" type="checkbox"><label for="vpaddq_s32"><div>int32x4_t <b><b>vpaddq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_s64" type="checkbox"><label for="vpaddq_s64"><div>int64x2_t <b><b>vpaddq_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u8" type="checkbox"><label for="vpaddq_u8"><div>uint8x16_t <b><b>vpaddq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u16" type="checkbox"><label for="vpaddq_u16"><div>uint16x8_t <b><b>vpaddq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u32" type="checkbox"><label for="vpaddq_u32"><div>uint32x4_t <b><b>vpaddq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_u64" type="checkbox"><label for="vpaddq_u64"><div>uint64x2_t <b><b>vpaddq_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_f32" type="checkbox"><label for="vpaddq_f32"><div>float32x4_t <b><b>vpaddq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddq_f64" type="checkbox"><label for="vpaddq_f64"><div>float64x2_t <b><b>vpaddq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s8" type="checkbox"><label for="vpaddl_s8"><div>int16x4_t <b><b>vpaddl_s8</b></b> (int8x8_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s8" type="checkbox"><label for="vpaddlq_s8"><div>int16x8_t <b><b>vpaddlq_s8</b></b> (int8x16_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s16" type="checkbox"><label for="vpaddl_s16"><div>int32x2_t <b><b>vpaddl_s16</b></b> (int16x4_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s16" type="checkbox"><label for="vpaddlq_s16"><div>int32x4_t <b><b>vpaddlq_s16</b></b> (int16x8_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_s32" type="checkbox"><label for="vpaddl_s32"><div>int64x1_t <b><b>vpaddl_s32</b></b> (int32x2_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_s32" type="checkbox"><label for="vpaddlq_s32"><div>int64x2_t <b><b>vpaddlq_s32</b></b> (int32x4_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u8" type="checkbox"><label for="vpaddl_u8"><div>uint16x4_t <b><b>vpaddl_u8</b></b> (uint8x8_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u8" type="checkbox"><label for="vpaddlq_u8"><div>uint16x8_t <b><b>vpaddlq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u16" type="checkbox"><label for="vpaddl_u16"><div>uint32x2_t <b><b>vpaddl_u16</b></b> (uint16x4_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u16" type="checkbox"><label for="vpaddlq_u16"><div>uint32x4_t <b><b>vpaddlq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddl_u32" type="checkbox"><label for="vpaddl_u32"><div>uint64x1_t <b><b>vpaddl_u32</b></b> (uint32x2_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpaddlq_u32" type="checkbox"><label for="vpaddlq_u32"><div>uint64x2_t <b><b>vpaddlq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s8" type="checkbox"><label for="vpadal_s8"><div>int16x4_t <b><b>vpadal_s8</b></b> (int16x4_t a, int8x8_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s8" type="checkbox"><label for="vpadalq_s8"><div>int16x8_t <b><b>vpadalq_s8</b></b> (int16x8_t a, int8x16_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s16" type="checkbox"><label for="vpadal_s16"><div>int32x2_t <b><b>vpadal_s16</b></b> (int32x2_t a, int16x4_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s16" type="checkbox"><label for="vpadalq_s16"><div>int32x4_t <b><b>vpadalq_s16</b></b> (int32x4_t a, int16x8_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_s32" type="checkbox"><label for="vpadal_s32"><div>int64x1_t <b><b>vpadal_s32</b></b> (int64x1_t a, int32x2_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_s32" type="checkbox"><label for="vpadalq_s32"><div>int64x2_t <b><b>vpadalq_s32</b></b> (int64x2_t a, int32x4_t b)<span class="right">Signed add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register and accumulates the results into the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sadalp-signed-add-and-accumulate-long-pairwise">SADALP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u8" type="checkbox"><label for="vpadal_u8"><div>uint16x4_t <b><b>vpadal_u8</b></b> (uint16x4_t a, uint8x8_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.4H,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H <br />
+b &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u8" type="checkbox"><label for="vpadalq_u8"><div>uint16x8_t <b><b>vpadalq_u8</b></b> (uint16x8_t a, uint8x16_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.8H,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H <br />
+b &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u16" type="checkbox"><label for="vpadal_u16"><div>uint32x2_t <b><b>vpadal_u16</b></b> (uint32x2_t a, uint16x4_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.2S,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u16" type="checkbox"><label for="vpadalq_u16"><div>uint32x4_t <b><b>vpadalq_u16</b></b> (uint32x4_t a, uint16x8_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.4S,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadal_u32" type="checkbox"><label for="vpadal_u32"><div>uint64x1_t <b><b>vpadal_u32</b></b> (uint64x1_t a, uint32x2_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D <br />
+b &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpadalq_u32" type="checkbox"><label for="vpadalq_u32"><div>uint64x2_t <b><b>vpadalq_u32</b></b> (uint64x2_t a, uint32x4_t b)<span class="right">Unsigned add and accumulate long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register and accumulates the results with the vector elements of the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uadalp-unsigned-add-and-accumulate-long-pairwise">UADALP</a> Vd.2D,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s8" type="checkbox"><label for="vpmax_s8"><div>int8x8_t <b><b>vpmax_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s16" type="checkbox"><label for="vpmax_s16"><div>int16x4_t <b><b>vpmax_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_s32" type="checkbox"><label for="vpmax_s32"><div>int32x2_t <b><b>vpmax_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u8" type="checkbox"><label for="vpmax_u8"><div>uint8x8_t <b><b>vpmax_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u16" type="checkbox"><label for="vpmax_u16"><div>uint16x4_t <b><b>vpmax_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_u32" type="checkbox"><label for="vpmax_u32"><div>uint32x2_t <b><b>vpmax_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmax_f32" type="checkbox"><label for="vpmax_f32"><div>float32x2_t <b><b>vpmax_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s8" type="checkbox"><label for="vpmaxq_s8"><div>int8x16_t <b><b>vpmaxq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s16" type="checkbox"><label for="vpmaxq_s16"><div>int16x8_t <b><b>vpmaxq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_s32" type="checkbox"><label for="vpmaxq_s32"><div>int32x4_t <b><b>vpmaxq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u8" type="checkbox"><label for="vpmaxq_u8"><div>uint8x16_t <b><b>vpmaxq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u16" type="checkbox"><label for="vpmaxq_u16"><div>uint16x8_t <b><b>vpmaxq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_u32" type="checkbox"><label for="vpmaxq_u32"><div>uint32x4_t <b><b>vpmaxq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_f32" type="checkbox"><label for="vpmaxq_f32"><div>float32x4_t <b><b>vpmaxq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxq_f64" type="checkbox"><label for="vpmaxq_f64"><div>float64x2_t <b><b>vpmaxq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s8" type="checkbox"><label for="vpmin_s8"><div>int8x8_t <b><b>vpmin_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s16" type="checkbox"><label for="vpmin_s16"><div>int16x4_t <b><b>vpmin_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_s32" type="checkbox"><label for="vpmin_s32"><div>int32x2_t <b><b>vpmin_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u8" type="checkbox"><label for="vpmin_u8"><div>uint8x8_t <b><b>vpmin_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u16" type="checkbox"><label for="vpmin_u16"><div>uint16x4_t <b><b>vpmin_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_u32" type="checkbox"><label for="vpmin_u32"><div>uint32x2_t <b><b>vpmin_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpmin_f32" type="checkbox"><label for="vpmin_f32"><div>float32x2_t <b><b>vpmin_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s8" type="checkbox"><label for="vpminq_s8"><div>int8x16_t <b><b>vpminq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s16" type="checkbox"><label for="vpminq_s16"><div>int16x8_t <b><b>vpminq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_s32" type="checkbox"><label for="vpminq_s32"><div>int32x4_t <b><b>vpminq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u8" type="checkbox"><label for="vpminq_u8"><div>uint8x16_t <b><b>vpminq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u16" type="checkbox"><label for="vpminq_u16"><div>uint16x8_t <b><b>vpminq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_u32" type="checkbox"><label for="vpminq_u32"><div>uint32x4_t <b><b>vpminq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_f32" type="checkbox"><label for="vpminq_f32"><div>float32x4_t <b><b>vpminq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminq_f64" type="checkbox"><label for="vpminq_f64"><div>float64x2_t <b><b>vpminq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnm_f32" type="checkbox"><label for="vpmaxnm_f32"><div>float32x2_t <b><b>vpmaxnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmq_f32" type="checkbox"><label for="vpmaxnmq_f32"><div>float32x4_t <b><b>vpmaxnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmq_f64" type="checkbox"><label for="vpmaxnmq_f64"><div>float64x2_t <b><b>vpmaxnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnm_f32" type="checkbox"><label for="vpminnm_f32"><div>float32x2_t <b><b>vpminnm_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmq_f32" type="checkbox"><label for="vpminnmq_f32"><div>float32x4_t <b><b>vpminnmq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmq_f64" type="checkbox"><label for="vpminnmq_f64"><div>float64x2_t <b><b>vpminnmq_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_s64" type="checkbox"><label for="vpaddd_s64"><div>int64_t <b><b>vpaddd_s64</b></b> (int64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_u64" type="checkbox"><label for="vpaddd_u64"><div>uint64_t <b><b>vpaddd_u64</b></b> (uint64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpadds_f32" type="checkbox"><label for="vpadds_f32"><div>float32_t <b><b>vpadds_f32</b></b> (float32x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpaddd_f64" type="checkbox"><label for="vpaddd_f64"><div>float64_t <b><b>vpaddd_f64</b></b> (float64x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxs_f32" type="checkbox"><label for="vpmaxs_f32"><div>float32_t <b><b>vpmaxs_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxqd_f64" type="checkbox"><label for="vpmaxqd_f64"><div>float64_t <b><b>vpmaxqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmins_f32" type="checkbox"><label for="vpmins_f32"><div>float32_t <b><b>vpmins_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminqd_f64" type="checkbox"><label for="vpminqd_f64"><div>float64_t <b><b>vpminqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnms_f32" type="checkbox"><label for="vpmaxnms_f32"><div>float32_t <b><b>vpmaxnms_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpmaxnmqd_f64" type="checkbox"><label for="vpmaxnmqd_f64"><div>float64_t <b><b>vpmaxnmqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnms_f32" type="checkbox"><label for="vpminnms_f32"><div>float32_t <b><b>vpminnms_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vpminnmqd_f64" type="checkbox"><label for="vpminnmqd_f64"><div>float64_t <b><b>vpminnmqd_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s8" type="checkbox"><label for="vaddv_s8"><div>int8_t <b><b>vaddv_s8</b></b> (int8x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s8" type="checkbox"><label for="vaddvq_s8"><div>int8_t <b><b>vaddvq_s8</b></b> (int8x16_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s16" type="checkbox"><label for="vaddv_s16"><div>int16_t <b><b>vaddv_s16</b></b> (int16x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s16" type="checkbox"><label for="vaddvq_s16"><div>int16_t <b><b>vaddvq_s16</b></b> (int16x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_s32" type="checkbox"><label for="vaddv_s32"><div>int32_t <b><b>vaddv_s32</b></b> (int32x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s32" type="checkbox"><label for="vaddvq_s32"><div>int32_t <b><b>vaddvq_s32</b></b> (int32x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_s64" type="checkbox"><label for="vaddvq_s64"><div>int64_t <b><b>vaddvq_s64</b></b> (int64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u8" type="checkbox"><label for="vaddv_u8"><div>uint8_t <b><b>vaddv_u8</b></b> (uint8x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u8" type="checkbox"><label for="vaddvq_u8"><div>uint8_t <b><b>vaddvq_u8</b></b> (uint8x16_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u16" type="checkbox"><label for="vaddv_u16"><div>uint16_t <b><b>vaddv_u16</b></b> (uint16x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u16" type="checkbox"><label for="vaddvq_u16"><div>uint16_t <b><b>vaddvq_u16</b></b> (uint16x8_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_u32" type="checkbox"><label for="vaddv_u32"><div>uint32_t <b><b>vaddv_u32</b></b> (uint32x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u32" type="checkbox"><label for="vaddvq_u32"><div>uint32_t <b><b>vaddvq_u32</b></b> (uint32x4_t a)<span class="right">Add across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addv-add-across-vector">ADDV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_ADD" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_ADD</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_u64" type="checkbox"><label for="vaddvq_u64"><div>uint64_t <b><b>vaddvq_u64</b></b> (uint64x2_t a)<span class="right">Add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/addp-vector-add-pairwise-vector">ADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element1 + element2;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddv_f32" type="checkbox"><label for="vaddv_f32"><div>float32_t <b><b>vaddv_f32</b></b> (float32x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_f32" type="checkbox"><label for="vaddvq_f32"><div>float32_t <b><b>vaddvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Vt.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Sd,Vt.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+a &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddvq_f64" type="checkbox"><label for="vaddvq_f64"><div>float64_t <b><b>vaddvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point add pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/faddp-vector-floating-point-add-pairwise-vector">FADDP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPAdd.3" title="function: bits(N) FPAdd(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPAdd</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s8" type="checkbox"><label for="vaddlv_s8"><div>int16_t <b><b>vaddlv_s8</b></b> (int8x8_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Hd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s8" type="checkbox"><label for="vaddlvq_s8"><div>int16_t <b><b>vaddlvq_s8</b></b> (int8x16_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Hd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s16" type="checkbox"><label for="vaddlv_s16"><div>int32_t <b><b>vaddlv_s16</b></b> (int16x4_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Sd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s16" type="checkbox"><label for="vaddlvq_s16"><div>int32_t <b><b>vaddlvq_s16</b></b> (int16x8_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Sd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_s32" type="checkbox"><label for="vaddlv_s32"><div>int64_t <b><b>vaddlv_s32</b></b> (int32x2_t a)<span class="right">Signed add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlp-signed-add-long-pairwise">SADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_s32" type="checkbox"><label for="vaddlvq_s32"><div>int64_t <b><b>vaddlvq_s32</b></b> (int32x4_t a)<span class="right">Signed add long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/saddlv-signed-add-long-across-vector">SADDLV</a> Dd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u8" type="checkbox"><label for="vaddlv_u8"><div>uint16_t <b><b>vaddlv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Hd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u8" type="checkbox"><label for="vaddlvq_u8"><div>uint16_t <b><b>vaddlvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Hd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u16" type="checkbox"><label for="vaddlv_u16"><div>uint32_t <b><b>vaddlv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Sd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u16" type="checkbox"><label for="vaddlvq_u16"><div>uint32_t <b><b>vaddlvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Sd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlv_u32" type="checkbox"><label for="vaddlv_u32"><div>uint64_t <b><b>vaddlv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned add long pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&amp;FP register, places the result into a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the source vector elements.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlp-unsigned-add-long-pairwise">UADDLP</a> Vd.1D,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d] else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+for e = 0 to elements-1
+    op1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+0, esize], unsigned);
+    op2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)&lt;2*esize-1:0&gt;;
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[result, e, 2*esize] + sum;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vaddlvq_u32" type="checkbox"><label for="vaddlvq_u32"><div>uint64_t <b><b>vaddlvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned sum long across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&amp;FP register together, and writes the scalar result to the destination SIMD&amp;FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uaddlv-unsigned-sum-long-across-vector">UADDLV</a> Dd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer sum;
+
+sum = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = sum&lt;2*esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s8" type="checkbox"><label for="vmaxv_s8"><div>int8_t <b><b>vmaxv_s8</b></b> (int8x8_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s8" type="checkbox"><label for="vmaxvq_s8"><div>int8_t <b><b>vmaxvq_s8</b></b> (int8x16_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s16" type="checkbox"><label for="vmaxv_s16"><div>int16_t <b><b>vmaxv_s16</b></b> (int16x4_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s16" type="checkbox"><label for="vmaxvq_s16"><div>int16_t <b><b>vmaxvq_s16</b></b> (int16x8_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_s32" type="checkbox"><label for="vmaxv_s32"><div>int32_t <b><b>vmaxv_s32</b></b> (int32x2_t a)<span class="right">Signed maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxp-signed-maximum-pairwise">SMAXP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_s32" type="checkbox"><label for="vmaxvq_s32"><div>int32_t <b><b>vmaxvq_s32</b></b> (int32x4_t a)<span class="right">Signed maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smaxv-signed-maximum-across-vector">SMAXV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u8" type="checkbox"><label for="vmaxv_u8"><div>uint8_t <b><b>vmaxv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u8" type="checkbox"><label for="vmaxvq_u8"><div>uint8_t <b><b>vmaxvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u16" type="checkbox"><label for="vmaxv_u16"><div>uint16_t <b><b>vmaxv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u16" type="checkbox"><label for="vmaxvq_u16"><div>uint16_t <b><b>vmaxvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_u32" type="checkbox"><label for="vmaxv_u32"><div>uint32_t <b><b>vmaxv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxp-unsigned-maximum-pairwise">UMAXP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_u32" type="checkbox"><label for="vmaxvq_u32"><div>uint32_t <b><b>vmaxvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umaxv-unsigned-maximum-across-vector">UMAXV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxv_f32" type="checkbox"><label for="vmaxv_f32"><div>float32_t <b><b>vmaxv_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_f32" type="checkbox"><label for="vmaxvq_f32"><div>float32_t <b><b>vmaxvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point maximum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxv-floating-point-maximum-across-vector">FMAXV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMAX" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMAX</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxvq_f64" type="checkbox"><label for="vmaxvq_f64"><div>float64_t <b><b>vmaxvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxp-vector-floating-point-maximum-pairwise-vector">FMAXP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s8" type="checkbox"><label for="vminv_s8"><div>int8_t <b><b>vminv_s8</b></b> (int8x8_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s8" type="checkbox"><label for="vminvq_s8"><div>int8_t <b><b>vminvq_s8</b></b> (int8x16_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s16" type="checkbox"><label for="vminv_s16"><div>int16_t <b><b>vminv_s16</b></b> (int16x4_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s16" type="checkbox"><label for="vminvq_s16"><div>int16_t <b><b>vminvq_s16</b></b> (int16x8_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_s32" type="checkbox"><label for="vminv_s32"><div>int32_t <b><b>vminv_s32</b></b> (int32x2_t a)<span class="right">Signed minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminp-signed-minimum-pairwise">SMINP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_s32" type="checkbox"><label for="vminvq_s32"><div>int32_t <b><b>vminvq_s32</b></b> (int32x4_t a)<span class="right">Signed minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are signed integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sminv-signed-minimum-across-vector">SMINV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u8" type="checkbox"><label for="vminv_u8"><div>uint8_t <b><b>vminv_u8</b></b> (uint8x8_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Bd,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u8" type="checkbox"><label for="vminvq_u8"><div>uint8_t <b><b>vminvq_u8</b></b> (uint8x16_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Bd,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Bd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u16" type="checkbox"><label for="vminv_u16"><div>uint16_t <b><b>vminv_u16</b></b> (uint16x4_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Hd,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u16" type="checkbox"><label for="vminvq_u16"><div>uint16_t <b><b>vminvq_u16</b></b> (uint16x8_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Hd,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_u32" type="checkbox"><label for="vminv_u32"><div>uint32_t <b><b>vminv_u32</b></b> (uint32x2_t a)<span class="right">Unsigned minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminp-unsigned-minimum-pairwise">UMINP</a>  Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+a &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.S[0] &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize], unsigned);
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(element1, element2) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(element1, element2);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = maxmin&lt;esize-1:0&gt;;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_u32" type="checkbox"><label for="vminvq_u32"><div>uint32_t <b><b>vminvq_u32</b></b> (uint32x4_t a)<span class="right">Unsigned minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are unsigned integer values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uminv-unsigned-minimum-across-vector">UMINV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+integer maxmin;
+integer element;
+
+maxmin = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Int.2" title="function: integer Int(bits(N) x, boolean unsigned)">Int</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize], unsigned);
+    maxmin = if min then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Min.2" title="function: integer Min(integer a, integer b)">Min</a>(maxmin, element) else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Max.2" title="function: integer Max(integer a, integer b)">Max</a>(maxmin, element);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = maxmin&lt;esize-1:0&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminv_f32" type="checkbox"><label for="vminv_f32"><div>float32_t <b><b>vminv_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_f32" type="checkbox"><label for="vminvq_f32"><div>float32_t <b><b>vminvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point minimum across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminv-floating-point-minimum-across-vector">FMINV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMIN" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMIN</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminvq_f64" type="checkbox"><label for="vminvq_f64"><div>float64_t <b><b>vminvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminp-vector-floating-point-minimum-pairwise-vector">FMINP</a> Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMin.3" title="function: bits(N) FPMin(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMin</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMax.3" title="function: bits(N) FPMax(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMax</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmv_f32" type="checkbox"><label for="vmaxnmv_f32"><div>float32_t <b><b>vmaxnmv_f32</b></b> (float32x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a> Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmvq_f32" type="checkbox"><label for="vmaxnmvq_f32"><div>float32_t <b><b>vmaxnmvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point maximum number across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the largest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmv-floating-point-maximum-number-across-vector">FMAXNMV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMAXNUM" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMAXNUM</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vmaxnmvq_f64" type="checkbox"><label for="vmaxnmvq_f64"><div>float64_t <b><b>vmaxnmvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point maximum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmaxnmp-vector-floating-point-maximum-number-pairwise-vector">FMAXNMP</a>  Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmv_f32" type="checkbox"><label for="vminnmv_f32"><div>float32_t <b><b>vminnmv_f32</b></b> (float32x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a>  Sd,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmvq_f32" type="checkbox"><label for="vminnmvq_f32"><div>float32_t <b><b>vminnmvq_f32</b></b> (float32x4_t a)<span class="right">Floating-point minimum number across vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number across Vector. This instruction compares all the vector elements in the source SIMD&amp;FP register, and writes the smallest of the values as a scalar to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmv-floating-point-minimum-number-across-vector">FMINNMV</a> Sd,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.Reduce.3" title="function: bits(esize) Reduce(ReduceOp op, bits(N) input, integer esize)">Reduce</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#ReduceOp_FMINNUM" title="enumeration ReduceOp {ReduceOp_FMINNUM, ReduceOp_FMAXNUM,
+ ReduceOp_FMIN, ReduceOp_FMAX,
+ ReduceOp_FADD, ReduceOp_ADD}">ReduceOp_FMINNUM</a>, operand, esize);</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vminnmvq_f64" type="checkbox"><label for="vminnmvq_f64"><div>float64_t <b><b>vminnmvq_f64</b></b> (float64x2_t a)<span class="right">Floating-point minimum number pairwise</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&amp;FP register after the vector elements of the second source SIMD&amp;FP register, reads each pair of adjacent vector elements in the two source SIMD&amp;FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&amp;FP register. All the values in this instruction are floating-point values.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fminnmp-vector-floating-point-minimum-number-pairwise-vector">FMINNMP</a>  Dd,Vn.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, 2*e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[concat, (2*e)+1, esize];
+    else
+        element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+        element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+
+    if minimum then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMinNum.3" title="function: bits(N) FPMinNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMinNum</a>(element1, element2, FPCR);
+    else
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMaxNum.3" title="function: bits(N) FPMaxNum(bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMaxNum</a>(element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vext_s8" type="checkbox"><label for="vext_s8"><div>int8x8_t <b><b>vext_s8</b></b> (int8x8_t a, int8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s8" type="checkbox"><label for="vextq_s8"><div>int8x16_t <b><b>vextq_s8</b></b> (int8x16_t a, int8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s16" type="checkbox"><label for="vext_s16"><div>int16x4_t <b><b>vext_s16</b></b> (int16x4_t a, int16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s16" type="checkbox"><label for="vextq_s16"><div>int16x8_t <b><b>vextq_s16</b></b> (int16x8_t a, int16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s32" type="checkbox"><label for="vext_s32"><div>int32x2_t <b><b>vext_s32</b></b> (int32x2_t a, int32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s32" type="checkbox"><label for="vextq_s32"><div>int32x4_t <b><b>vextq_s32</b></b> (int32x4_t a, int32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_s64" type="checkbox"><label for="vext_s64"><div>int64x1_t <b><b>vext_s64</b></b> (int64x1_t a, int64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_s64" type="checkbox"><label for="vextq_s64"><div>int64x2_t <b><b>vextq_s64</b></b> (int64x2_t a, int64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u8" type="checkbox"><label for="vext_u8"><div>uint8x8_t <b><b>vext_u8</b></b> (uint8x8_t a, uint8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u8" type="checkbox"><label for="vextq_u8"><div>uint8x16_t <b><b>vextq_u8</b></b> (uint8x16_t a, uint8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u16" type="checkbox"><label for="vext_u16"><div>uint16x4_t <b><b>vext_u16</b></b> (uint16x4_t a, uint16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u16" type="checkbox"><label for="vextq_u16"><div>uint16x8_t <b><b>vextq_u16</b></b> (uint16x8_t a, uint16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u32" type="checkbox"><label for="vext_u32"><div>uint32x2_t <b><b>vext_u32</b></b> (uint32x2_t a, uint32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u32" type="checkbox"><label for="vextq_u32"><div>uint32x4_t <b><b>vextq_u32</b></b> (uint32x4_t a, uint32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_u64" type="checkbox"><label for="vext_u64"><div>uint64x1_t <b><b>vext_u64</b></b> (uint64x1_t a, uint64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_u64" type="checkbox"><label for="vextq_u64"><div>uint64x2_t <b><b>vextq_u64</b></b> (uint64x2_t a, uint64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_p64" type="checkbox"><label for="vext_p64"><div>poly64x1_t <b><b>vext_p64</b></b> (poly64x1_t a, poly64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p64" type="checkbox"><label for="vextq_p64"><div>poly64x2_t <b><b>vextq_p64</b></b> (poly64x2_t a, poly64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_f32" type="checkbox"><label for="vext_f32"><div>float32x2_t <b><b>vext_f32</b></b> (float32x2_t a, float32x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_f32" type="checkbox"><label for="vextq_f32"><div>float32x4_t <b><b>vextq_f32</b></b> (float32x4_t a, float32x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_f64" type="checkbox"><label for="vext_f64"><div>float64x1_t <b><b>vext_f64</b></b> (float64x1_t a, float64x1_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vextq_f64" type="checkbox"><label for="vextq_f64"><div>float64x2_t <b><b>vextq_f64</b></b> (float64x2_t a, float64x2_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vext_p8" type="checkbox"><label for="vext_p8"><div>poly8x8_t <b><b>vext_p8</b></b> (poly8x8_t a, poly8x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p8" type="checkbox"><label for="vextq_p8"><div>poly8x16_t <b><b>vextq_p8</b></b> (poly8x16_t a, poly8x16_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#n
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vext_p16" type="checkbox"><label for="vext_p16"><div>poly16x4_t <b><b>vext_p16</b></b> (poly16x4_t a, poly16x4_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B <br />
+0 &lt;&lt; n &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vextq_p16" type="checkbox"><label for="vextq_p16"><div>poly16x8_t <b><b>vextq_p16</b></b> (poly16x8_t a, poly16x8_t b, const int n)<span class="right">Extract vector from pair of vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&amp;FP register and the highest vector elements from the first source SIMD&amp;FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&amp;FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ext-extract-vector-from-pair-of-vectors">EXT</a> Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B <br />
+0 &lt;&lt; n &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) hi = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) lo = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize*2) concat = hi:lo;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = concat&lt;position+datasize-1:position&gt;;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s8" type="checkbox"><label for="vrev64_s8"><div>int8x8_t <b><b>vrev64_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s8" type="checkbox"><label for="vrev64q_s8"><div>int8x16_t <b><b>vrev64q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s16" type="checkbox"><label for="vrev64_s16"><div>int16x4_t <b><b>vrev64_s16</b></b> (int16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s16" type="checkbox"><label for="vrev64q_s16"><div>int16x8_t <b><b>vrev64q_s16</b></b> (int16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_s32" type="checkbox"><label for="vrev64_s32"><div>int32x2_t <b><b>vrev64_s32</b></b> (int32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_s32" type="checkbox"><label for="vrev64q_s32"><div>int32x4_t <b><b>vrev64q_s32</b></b> (int32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u8" type="checkbox"><label for="vrev64_u8"><div>uint8x8_t <b><b>vrev64_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u8" type="checkbox"><label for="vrev64q_u8"><div>uint8x16_t <b><b>vrev64q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u16" type="checkbox"><label for="vrev64_u16"><div>uint16x4_t <b><b>vrev64_u16</b></b> (uint16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u16" type="checkbox"><label for="vrev64q_u16"><div>uint16x8_t <b><b>vrev64q_u16</b></b> (uint16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_u32" type="checkbox"><label for="vrev64_u32"><div>uint32x2_t <b><b>vrev64_u32</b></b> (uint32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_u32" type="checkbox"><label for="vrev64q_u32"><div>uint32x4_t <b><b>vrev64q_u32</b></b> (uint32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_f32" type="checkbox"><label for="vrev64_f32"><div>float32x2_t <b><b>vrev64_f32</b></b> (float32x2_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.2S,Vn.2S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_f32" type="checkbox"><label for="vrev64q_f32"><div>float32x4_t <b><b>vrev64q_f32</b></b> (float32x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_p8" type="checkbox"><label for="vrev64_p8"><div>poly8x8_t <b><b>vrev64_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_p8" type="checkbox"><label for="vrev64q_p8"><div>poly8x16_t <b><b>vrev64q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64_p16" type="checkbox"><label for="vrev64_p16"><div>poly16x4_t <b><b>vrev64_p16</b></b> (poly16x4_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev64q_p16" type="checkbox"><label for="vrev64q_p16"><div>poly16x8_t <b><b>vrev64q_p16</b></b> (poly16x8_t vec)<span class="right">Reverse elements in 64-bit doublewords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev64-reverse-elements-in-64-bit-doublewords-vector">REV64</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_s8" type="checkbox"><label for="vrev32_s8"><div>int8x8_t <b><b>vrev32_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_s8" type="checkbox"><label for="vrev32q_s8"><div>int8x16_t <b><b>vrev32q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_s16" type="checkbox"><label for="vrev32_s16"><div>int16x4_t <b><b>vrev32_s16</b></b> (int16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_s16" type="checkbox"><label for="vrev32q_s16"><div>int16x8_t <b><b>vrev32q_s16</b></b> (int16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_u8" type="checkbox"><label for="vrev32_u8"><div>uint8x8_t <b><b>vrev32_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_u8" type="checkbox"><label for="vrev32q_u8"><div>uint8x16_t <b><b>vrev32q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_u16" type="checkbox"><label for="vrev32_u16"><div>uint16x4_t <b><b>vrev32_u16</b></b> (uint16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_u16" type="checkbox"><label for="vrev32q_u16"><div>uint16x8_t <b><b>vrev32q_u16</b></b> (uint16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_p8" type="checkbox"><label for="vrev32_p8"><div>poly8x8_t <b><b>vrev32_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_p8" type="checkbox"><label for="vrev32q_p8"><div>poly8x16_t <b><b>vrev32q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32_p16" type="checkbox"><label for="vrev32_p16"><div>poly16x4_t <b><b>vrev32_p16</b></b> (poly16x4_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.4H,Vn.4H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev32q_p16" type="checkbox"><label for="vrev32q_p16"><div>poly16x8_t <b><b>vrev32q_p16</b></b> (poly16x8_t vec)<span class="right">Reverse elements in 32-bit words</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev32-vector-reverse-elements-in-32-bit-words-vector">REV32</a> Vd.8H,Vn.8H
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_s8" type="checkbox"><label for="vrev16_s8"><div>int8x8_t <b><b>vrev16_s8</b></b> (int8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_s8" type="checkbox"><label for="vrev16q_s8"><div>int8x16_t <b><b>vrev16q_s8</b></b> (int8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_u8" type="checkbox"><label for="vrev16_u8"><div>uint8x8_t <b><b>vrev16_u8</b></b> (uint8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_u8" type="checkbox"><label for="vrev16q_u8"><div>uint8x16_t <b><b>vrev16q_u8</b></b> (uint8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16_p8" type="checkbox"><label for="vrev16_p8"><div>poly8x8_t <b><b>vrev16_p8</b></b> (poly8x8_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.8B,Vn.8B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vrev16q_p8" type="checkbox"><label for="vrev16q_p8"><div>poly8x16_t <b><b>vrev16q_p8</b></b> (poly8x16_t vec)<span class="right">Reverse elements in 16-bit halfwords</span></div></label><article>      <h4>Description</h4><p><p class="aml">Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&amp;FP register, places the results into a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/rev16-vector-reverse-elements-in-16-bit-halfwords-vector">REV16</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>vec &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, rev_element, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s8" type="checkbox"><label for="vzip1_s8"><div>int8x8_t <b><b>vzip1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s8" type="checkbox"><label for="vzip1q_s8"><div>int8x16_t <b><b>vzip1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s16" type="checkbox"><label for="vzip1_s16"><div>int16x4_t <b><b>vzip1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s16" type="checkbox"><label for="vzip1q_s16"><div>int16x8_t <b><b>vzip1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_s32" type="checkbox"><label for="vzip1_s32"><div>int32x2_t <b><b>vzip1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s32" type="checkbox"><label for="vzip1q_s32"><div>int32x4_t <b><b>vzip1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_s64" type="checkbox"><label for="vzip1q_s64"><div>int64x2_t <b><b>vzip1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u8" type="checkbox"><label for="vzip1_u8"><div>uint8x8_t <b><b>vzip1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u8" type="checkbox"><label for="vzip1q_u8"><div>uint8x16_t <b><b>vzip1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u16" type="checkbox"><label for="vzip1_u16"><div>uint16x4_t <b><b>vzip1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u16" type="checkbox"><label for="vzip1q_u16"><div>uint16x8_t <b><b>vzip1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_u32" type="checkbox"><label for="vzip1_u32"><div>uint32x2_t <b><b>vzip1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u32" type="checkbox"><label for="vzip1q_u32"><div>uint32x4_t <b><b>vzip1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_u64" type="checkbox"><label for="vzip1q_u64"><div>uint64x2_t <b><b>vzip1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p64" type="checkbox"><label for="vzip1q_p64"><div>poly64x2_t <b><b>vzip1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_f32" type="checkbox"><label for="vzip1_f32"><div>float32x2_t <b><b>vzip1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_f32" type="checkbox"><label for="vzip1q_f32"><div>float32x4_t <b><b>vzip1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_f64" type="checkbox"><label for="vzip1q_f64"><div>float64x2_t <b><b>vzip1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_p8" type="checkbox"><label for="vzip1_p8"><div>poly8x8_t <b><b>vzip1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p8" type="checkbox"><label for="vzip1q_p8"><div>poly8x16_t <b><b>vzip1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1_p16" type="checkbox"><label for="vzip1_p16"><div>poly16x4_t <b><b>vzip1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip1q_p16" type="checkbox"><label for="vzip1q_p16"><div>poly16x8_t <b><b>vzip1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s8" type="checkbox"><label for="vzip2_s8"><div>int8x8_t <b><b>vzip2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s8" type="checkbox"><label for="vzip2q_s8"><div>int8x16_t <b><b>vzip2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s16" type="checkbox"><label for="vzip2_s16"><div>int16x4_t <b><b>vzip2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s16" type="checkbox"><label for="vzip2q_s16"><div>int16x8_t <b><b>vzip2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_s32" type="checkbox"><label for="vzip2_s32"><div>int32x2_t <b><b>vzip2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s32" type="checkbox"><label for="vzip2q_s32"><div>int32x4_t <b><b>vzip2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_s64" type="checkbox"><label for="vzip2q_s64"><div>int64x2_t <b><b>vzip2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u8" type="checkbox"><label for="vzip2_u8"><div>uint8x8_t <b><b>vzip2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u8" type="checkbox"><label for="vzip2q_u8"><div>uint8x16_t <b><b>vzip2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u16" type="checkbox"><label for="vzip2_u16"><div>uint16x4_t <b><b>vzip2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u16" type="checkbox"><label for="vzip2q_u16"><div>uint16x8_t <b><b>vzip2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_u32" type="checkbox"><label for="vzip2_u32"><div>uint32x2_t <b><b>vzip2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u32" type="checkbox"><label for="vzip2q_u32"><div>uint32x4_t <b><b>vzip2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_u64" type="checkbox"><label for="vzip2q_u64"><div>uint64x2_t <b><b>vzip2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p64" type="checkbox"><label for="vzip2q_p64"><div>poly64x2_t <b><b>vzip2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_f32" type="checkbox"><label for="vzip2_f32"><div>float32x2_t <b><b>vzip2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_f32" type="checkbox"><label for="vzip2q_f32"><div>float32x4_t <b><b>vzip2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_f64" type="checkbox"><label for="vzip2q_f64"><div>float64x2_t <b><b>vzip2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_p8" type="checkbox"><label for="vzip2_p8"><div>poly8x8_t <b><b>vzip2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p8" type="checkbox"><label for="vzip2q_p8"><div>poly8x16_t <b><b>vzip2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2_p16" type="checkbox"><label for="vzip2_p16"><div>poly16x4_t <b><b>vzip2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vzip2q_p16" type="checkbox"><label for="vzip2q_p16"><div>poly16x8_t <b><b>vzip2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s8" type="checkbox"><label for="vuzp1_s8"><div>int8x8_t <b><b>vuzp1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s8" type="checkbox"><label for="vuzp1q_s8"><div>int8x16_t <b><b>vuzp1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s16" type="checkbox"><label for="vuzp1_s16"><div>int16x4_t <b><b>vuzp1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s16" type="checkbox"><label for="vuzp1q_s16"><div>int16x8_t <b><b>vuzp1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_s32" type="checkbox"><label for="vuzp1_s32"><div>int32x2_t <b><b>vuzp1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s32" type="checkbox"><label for="vuzp1q_s32"><div>int32x4_t <b><b>vuzp1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_s64" type="checkbox"><label for="vuzp1q_s64"><div>int64x2_t <b><b>vuzp1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u8" type="checkbox"><label for="vuzp1_u8"><div>uint8x8_t <b><b>vuzp1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u8" type="checkbox"><label for="vuzp1q_u8"><div>uint8x16_t <b><b>vuzp1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u16" type="checkbox"><label for="vuzp1_u16"><div>uint16x4_t <b><b>vuzp1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u16" type="checkbox"><label for="vuzp1q_u16"><div>uint16x8_t <b><b>vuzp1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_u32" type="checkbox"><label for="vuzp1_u32"><div>uint32x2_t <b><b>vuzp1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u32" type="checkbox"><label for="vuzp1q_u32"><div>uint32x4_t <b><b>vuzp1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_u64" type="checkbox"><label for="vuzp1q_u64"><div>uint64x2_t <b><b>vuzp1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p64" type="checkbox"><label for="vuzp1q_p64"><div>poly64x2_t <b><b>vuzp1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_f32" type="checkbox"><label for="vuzp1_f32"><div>float32x2_t <b><b>vuzp1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_f32" type="checkbox"><label for="vuzp1q_f32"><div>float32x4_t <b><b>vuzp1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_f64" type="checkbox"><label for="vuzp1q_f64"><div>float64x2_t <b><b>vuzp1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_p8" type="checkbox"><label for="vuzp1_p8"><div>poly8x8_t <b><b>vuzp1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p8" type="checkbox"><label for="vuzp1q_p8"><div>poly8x16_t <b><b>vuzp1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1_p16" type="checkbox"><label for="vuzp1_p16"><div>poly16x4_t <b><b>vuzp1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp1q_p16" type="checkbox"><label for="vuzp1q_p16"><div>poly16x8_t <b><b>vuzp1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s8" type="checkbox"><label for="vuzp2_s8"><div>int8x8_t <b><b>vuzp2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s8" type="checkbox"><label for="vuzp2q_s8"><div>int8x16_t <b><b>vuzp2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s16" type="checkbox"><label for="vuzp2_s16"><div>int16x4_t <b><b>vuzp2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s16" type="checkbox"><label for="vuzp2q_s16"><div>int16x8_t <b><b>vuzp2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_s32" type="checkbox"><label for="vuzp2_s32"><div>int32x2_t <b><b>vuzp2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s32" type="checkbox"><label for="vuzp2q_s32"><div>int32x4_t <b><b>vuzp2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_s64" type="checkbox"><label for="vuzp2q_s64"><div>int64x2_t <b><b>vuzp2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u8" type="checkbox"><label for="vuzp2_u8"><div>uint8x8_t <b><b>vuzp2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u8" type="checkbox"><label for="vuzp2q_u8"><div>uint8x16_t <b><b>vuzp2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u16" type="checkbox"><label for="vuzp2_u16"><div>uint16x4_t <b><b>vuzp2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u16" type="checkbox"><label for="vuzp2q_u16"><div>uint16x8_t <b><b>vuzp2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_u32" type="checkbox"><label for="vuzp2_u32"><div>uint32x2_t <b><b>vuzp2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u32" type="checkbox"><label for="vuzp2q_u32"><div>uint32x4_t <b><b>vuzp2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_u64" type="checkbox"><label for="vuzp2q_u64"><div>uint64x2_t <b><b>vuzp2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p64" type="checkbox"><label for="vuzp2q_p64"><div>poly64x2_t <b><b>vuzp2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_f32" type="checkbox"><label for="vuzp2_f32"><div>float32x2_t <b><b>vuzp2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_f32" type="checkbox"><label for="vuzp2q_f32"><div>float32x4_t <b><b>vuzp2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_f64" type="checkbox"><label for="vuzp2q_f64"><div>float64x2_t <b><b>vuzp2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_p8" type="checkbox"><label for="vuzp2_p8"><div>poly8x8_t <b><b>vuzp2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p8" type="checkbox"><label for="vuzp2q_p8"><div>poly8x16_t <b><b>vuzp2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2_p16" type="checkbox"><label for="vuzp2_p16"><div>poly16x4_t <b><b>vuzp2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vuzp2q_p16" type="checkbox"><label for="vuzp2q_p16"><div>poly16x8_t <b><b>vuzp2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s8" type="checkbox"><label for="vtrn1_s8"><div>int8x8_t <b><b>vtrn1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s8" type="checkbox"><label for="vtrn1q_s8"><div>int8x16_t <b><b>vtrn1q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s16" type="checkbox"><label for="vtrn1_s16"><div>int16x4_t <b><b>vtrn1_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s16" type="checkbox"><label for="vtrn1q_s16"><div>int16x8_t <b><b>vtrn1q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_s32" type="checkbox"><label for="vtrn1_s32"><div>int32x2_t <b><b>vtrn1_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s32" type="checkbox"><label for="vtrn1q_s32"><div>int32x4_t <b><b>vtrn1q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_s64" type="checkbox"><label for="vtrn1q_s64"><div>int64x2_t <b><b>vtrn1q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u8" type="checkbox"><label for="vtrn1_u8"><div>uint8x8_t <b><b>vtrn1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u8" type="checkbox"><label for="vtrn1q_u8"><div>uint8x16_t <b><b>vtrn1q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u16" type="checkbox"><label for="vtrn1_u16"><div>uint16x4_t <b><b>vtrn1_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u16" type="checkbox"><label for="vtrn1q_u16"><div>uint16x8_t <b><b>vtrn1q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_u32" type="checkbox"><label for="vtrn1_u32"><div>uint32x2_t <b><b>vtrn1_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u32" type="checkbox"><label for="vtrn1q_u32"><div>uint32x4_t <b><b>vtrn1q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_u64" type="checkbox"><label for="vtrn1q_u64"><div>uint64x2_t <b><b>vtrn1q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p64" type="checkbox"><label for="vtrn1q_p64"><div>poly64x2_t <b><b>vtrn1q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_f32" type="checkbox"><label for="vtrn1_f32"><div>float32x2_t <b><b>vtrn1_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_f32" type="checkbox"><label for="vtrn1q_f32"><div>float32x4_t <b><b>vtrn1q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_f64" type="checkbox"><label for="vtrn1q_f64"><div>float64x2_t <b><b>vtrn1q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_p8" type="checkbox"><label for="vtrn1_p8"><div>poly8x8_t <b><b>vtrn1_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p8" type="checkbox"><label for="vtrn1q_p8"><div>poly8x16_t <b><b>vtrn1q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1_p16" type="checkbox"><label for="vtrn1_p16"><div>poly16x4_t <b><b>vtrn1_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn1q_p16" type="checkbox"><label for="vtrn1q_p16"><div>poly16x8_t <b><b>vtrn1q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&amp;FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s8" type="checkbox"><label for="vtrn2_s8"><div>int8x8_t <b><b>vtrn2_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s8" type="checkbox"><label for="vtrn2q_s8"><div>int8x16_t <b><b>vtrn2q_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s16" type="checkbox"><label for="vtrn2_s16"><div>int16x4_t <b><b>vtrn2_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s16" type="checkbox"><label for="vtrn2q_s16"><div>int16x8_t <b><b>vtrn2q_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_s32" type="checkbox"><label for="vtrn2_s32"><div>int32x2_t <b><b>vtrn2_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s32" type="checkbox"><label for="vtrn2q_s32"><div>int32x4_t <b><b>vtrn2q_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_s64" type="checkbox"><label for="vtrn2q_s64"><div>int64x2_t <b><b>vtrn2q_s64</b></b> (int64x2_t a, int64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u8" type="checkbox"><label for="vtrn2_u8"><div>uint8x8_t <b><b>vtrn2_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u8" type="checkbox"><label for="vtrn2q_u8"><div>uint8x16_t <b><b>vtrn2q_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u16" type="checkbox"><label for="vtrn2_u16"><div>uint16x4_t <b><b>vtrn2_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u16" type="checkbox"><label for="vtrn2q_u16"><div>uint16x8_t <b><b>vtrn2q_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_u32" type="checkbox"><label for="vtrn2_u32"><div>uint32x2_t <b><b>vtrn2_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u32" type="checkbox"><label for="vtrn2q_u32"><div>uint32x4_t <b><b>vtrn2q_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_u64" type="checkbox"><label for="vtrn2q_u64"><div>uint64x2_t <b><b>vtrn2q_u64</b></b> (uint64x2_t a, uint64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p64" type="checkbox"><label for="vtrn2q_p64"><div>poly64x2_t <b><b>vtrn2q_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_f32" type="checkbox"><label for="vtrn2_f32"><div>float32x2_t <b><b>vtrn2_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_f32" type="checkbox"><label for="vtrn2q_f32"><div>float32x4_t <b><b>vtrn2q_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_f64" type="checkbox"><label for="vtrn2q_f64"><div>float64x2_t <b><b>vtrn2q_f64</b></b> (float64x2_t a, float64x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.2D,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_p8" type="checkbox"><label for="vtrn2_p8"><div>poly8x8_t <b><b>vtrn2_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p8" type="checkbox"><label for="vtrn2q_p8"><div>poly8x16_t <b><b>vtrn2q_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2_p16" type="checkbox"><label for="vtrn2_p16"><div>poly16x4_t <b><b>vtrn2_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn2q_p16" type="checkbox"><label for="vtrn2q_p16"><div>poly16x8_t <b><b>vtrn2q_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_s8" type="checkbox"><label for="vtbl1_s8"><div>int8x8_t <b><b>vtbl1_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_u8" type="checkbox"><label for="vtbl1_u8"><div>uint8x8_t <b><b>vtbl1_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl1_p8" type="checkbox"><label for="vtbl1_p8"><div>poly8x8_t <b><b>vtbl1_p8</b></b> (poly8x8_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; Zeros(64):a <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_s8" type="checkbox"><label for="vtbx1_s8"><div>int8x8_t <b><b>vtbx1_s8</b></b> (int8x8_t a, int8x8_t b, int8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; Zeros(64):b <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_u8" type="checkbox"><label for="vtbx1_u8"><div>uint8x8_t <b><b>vtbx1_u8</b></b> (uint8x8_t a, uint8x8_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; Zeros(64):b <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx1_p8" type="checkbox"><label for="vtbx1_p8"><div>poly8x8_t <b><b>vtbx1_p8</b></b> (poly8x8_t a, poly8x8_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B, Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; Zeros(64):b <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_s8" type="checkbox"><label for="vtbl2_s8"><div>int8x8_t <b><b>vtbl2_s8</b></b> (int8x8x2_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_u8" type="checkbox"><label for="vtbl2_u8"><div>uint8x8_t <b><b>vtbl2_u8</b></b> (uint8x8x2_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl2_p8" type="checkbox"><label for="vtbl2_p8"><div>poly8x8_t <b><b>vtbl2_p8</b></b> (poly8x8x2_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_s8" type="checkbox"><label for="vtbl3_s8"><div>int8x8_t <b><b>vtbl3_s8</b></b> (int8x8x3_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; Zeros(64):a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_u8" type="checkbox"><label for="vtbl3_u8"><div>uint8x8_t <b><b>vtbl3_u8</b></b> (uint8x8x3_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; Zeros(64):a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl3_p8" type="checkbox"><label for="vtbl3_p8"><div>poly8x8_t <b><b>vtbl3_p8</b></b> (poly8x8x3_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; Zeros(64):a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_s8" type="checkbox"><label for="vtbl4_s8"><div>int8x8_t <b><b>vtbl4_s8</b></b> (int8x8x4_t a, int8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; a.val[3]:a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_u8" type="checkbox"><label for="vtbl4_u8"><div>uint8x8_t <b><b>vtbl4_u8</b></b> (uint8x8x4_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; a.val[3]:a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbl4_p8" type="checkbox"><label for="vtbl4_p8"><div>poly8x8_t <b><b>vtbl4_p8</b></b> (poly8x8x4_t a, uint8x8_t b)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>Vn &rarr; a.val[1]:a.val[0] <br />
+Vn+1 &rarr; a.val[3]:a.val[2] <br />
+b &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_s8" type="checkbox"><label for="vtbx2_s8"><div>int8x8_t <b><b>vtbx2_s8</b></b> (int8x8_t a, int8x8x2_t b, int8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_u8" type="checkbox"><label for="vtbx2_u8"><div>uint8x8_t <b><b>vtbx2_u8</b></b> (uint8x8_t a, uint8x8x2_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx2_p8" type="checkbox"><label for="vtbx2_p8"><div>poly8x8_t <b><b>vtbx2_p8</b></b> (poly8x8_t a, poly8x8x2_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_s8" type="checkbox"><label for="vtbx3_s8"><div>int8x8_t <b><b>vtbx3_s8</b></b> (int8x8_t a, int8x8x3_t b, int8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; Zeros(64):b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_u8" type="checkbox"><label for="vtbx3_u8"><div>uint8x8_t <b><b>vtbx3_u8</b></b> (uint8x8_t a, uint8x8x3_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; Zeros(64):b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx3_p8" type="checkbox"><label for="vtbx3_p8"><div>poly8x8_t <b><b>vtbx3_p8</b></b> (poly8x8_t a, poly8x8x3_t b, uint8x8_t c)<span class="right">Bitwise insert if false</span></div></label><article>      <h4>Description</h4><p><p class="aml">Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&amp;FP register into the destination SIMD&amp;FP register if the corresponding bit of the second source SIMD&amp;FP register is 0, otherwise leaves the bit in the destination register unchanged.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/movi-move-immediate-vector">MOVI</a> Vtmp.8B,#24
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/cmhs-register-compare-unsigned-higher-or-same-vector">CMHS</a> Vtmp.8B,Vm.8B,Vtmp.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/bif-bitwise-insert-if-false">BIF</a> Vd.8B,Vtmp1.8B,Vtmp.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; Zeros(64):b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+operand3 = NOT(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m]);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_s8" type="checkbox"><label for="vtbx4_s8"><div>int8x8_t <b><b>vtbx4_s8</b></b> (int8x8_t a, int8x8x4_t b, int8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; b.val[3]:b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_u8" type="checkbox"><label for="vtbx4_u8"><div>uint8x8_t <b><b>vtbx4_u8</b></b> (uint8x8_t a, uint8x8x4_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; b.val[3]:b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtbx4_p8" type="checkbox"><label for="vtbx4_p8"><div>poly8x8_t <b><b>vtbx4_p8</b></b> (poly8x8_t a, poly8x8x4_t b, uint8x8_t c)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd <br />
+Vn &rarr; b.val[1]:b.val[0] <br />
+Vn+1 &rarr; b.val[3]:b.val[2] <br />
+c &rarr; Vm </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_s8" type="checkbox"><label for="vqtbl1_s8"><div>int8x8_t <b><b>vqtbl1_s8</b></b> (int8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_s8" type="checkbox"><label for="vqtbl1q_s8"><div>int8x16_t <b><b>vqtbl1q_s8</b></b> (int8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_u8" type="checkbox"><label for="vqtbl1_u8"><div>uint8x8_t <b><b>vqtbl1_u8</b></b> (uint8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_u8" type="checkbox"><label for="vqtbl1q_u8"><div>uint8x16_t <b><b>vqtbl1q_u8</b></b> (uint8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1_p8" type="checkbox"><label for="vqtbl1_p8"><div>poly8x8_t <b><b>vqtbl1_p8</b></b> (poly8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl1q_p8" type="checkbox"><label for="vqtbl1q_p8"><div>poly8x16_t <b><b>vqtbl1q_p8</b></b> (poly8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_s8" type="checkbox"><label for="vqtbx1_s8"><div>int8x8_t <b><b>vqtbx1_s8</b></b> (int8x8_t a, int8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_s8" type="checkbox"><label for="vqtbx1q_s8"><div>int8x16_t <b><b>vqtbx1q_s8</b></b> (int8x16_t a, int8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_u8" type="checkbox"><label for="vqtbx1_u8"><div>uint8x8_t <b><b>vqtbx1_u8</b></b> (uint8x8_t a, uint8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_u8" type="checkbox"><label for="vqtbx1q_u8"><div>uint8x16_t <b><b>vqtbx1q_u8</b></b> (uint8x16_t a, uint8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1_p8" type="checkbox"><label for="vqtbx1_p8"><div>poly8x8_t <b><b>vqtbx1_p8</b></b> (poly8x8_t a, poly8x16_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx1q_p8" type="checkbox"><label for="vqtbx1q_p8"><div>poly8x16_t <b><b>vqtbx1q_p8</b></b> (poly8x16_t a, poly8x16_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t &rarr; Vn.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_s8" type="checkbox"><label for="vqtbl2_s8"><div>int8x8_t <b><b>vqtbl2_s8</b></b> (int8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_s8" type="checkbox"><label for="vqtbl2q_s8"><div>int8x16_t <b><b>vqtbl2q_s8</b></b> (int8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_u8" type="checkbox"><label for="vqtbl2_u8"><div>uint8x8_t <b><b>vqtbl2_u8</b></b> (uint8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_u8" type="checkbox"><label for="vqtbl2q_u8"><div>uint8x16_t <b><b>vqtbl2q_u8</b></b> (uint8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2_p8" type="checkbox"><label for="vqtbl2_p8"><div>poly8x8_t <b><b>vqtbl2_p8</b></b> (poly8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl2q_p8" type="checkbox"><label for="vqtbl2q_p8"><div>poly8x16_t <b><b>vqtbl2q_p8</b></b> (poly8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_s8" type="checkbox"><label for="vqtbl3_s8"><div>int8x8_t <b><b>vqtbl3_s8</b></b> (int8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_s8" type="checkbox"><label for="vqtbl3q_s8"><div>int8x16_t <b><b>vqtbl3q_s8</b></b> (int8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_u8" type="checkbox"><label for="vqtbl3_u8"><div>uint8x8_t <b><b>vqtbl3_u8</b></b> (uint8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_u8" type="checkbox"><label for="vqtbl3q_u8"><div>uint8x16_t <b><b>vqtbl3q_u8</b></b> (uint8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3_p8" type="checkbox"><label for="vqtbl3_p8"><div>poly8x8_t <b><b>vqtbl3_p8</b></b> (poly8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl3q_p8" type="checkbox"><label for="vqtbl3q_p8"><div>poly8x16_t <b><b>vqtbl3q_p8</b></b> (poly8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_s8" type="checkbox"><label for="vqtbl4_s8"><div>int8x8_t <b><b>vqtbl4_s8</b></b> (int8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_s8" type="checkbox"><label for="vqtbl4q_s8"><div>int8x16_t <b><b>vqtbl4q_s8</b></b> (int8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_u8" type="checkbox"><label for="vqtbl4_u8"><div>uint8x8_t <b><b>vqtbl4_u8</b></b> (uint8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_u8" type="checkbox"><label for="vqtbl4q_u8"><div>uint8x16_t <b><b>vqtbl4q_u8</b></b> (uint8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4_p8" type="checkbox"><label for="vqtbl4_p8"><div>poly8x8_t <b><b>vqtbl4_p8</b></b> (poly8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbl4q_p8" type="checkbox"><label for="vqtbl4q_p8"><div>poly8x16_t <b><b>vqtbl4q_p8</b></b> (poly8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbl-table-vector-lookup">TBL</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_s8" type="checkbox"><label for="vqtbx2_s8"><div>int8x8_t <b><b>vqtbx2_s8</b></b> (int8x8_t a, int8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_s8" type="checkbox"><label for="vqtbx2q_s8"><div>int8x16_t <b><b>vqtbx2q_s8</b></b> (int8x16_t a, int8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_u8" type="checkbox"><label for="vqtbx2_u8"><div>uint8x8_t <b><b>vqtbx2_u8</b></b> (uint8x8_t a, uint8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_u8" type="checkbox"><label for="vqtbx2q_u8"><div>uint8x16_t <b><b>vqtbx2q_u8</b></b> (uint8x16_t a, uint8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2_p8" type="checkbox"><label for="vqtbx2_p8"><div>poly8x8_t <b><b>vqtbx2_p8</b></b> (poly8x8_t a, poly8x16x2_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx2q_p8" type="checkbox"><label for="vqtbx2q_p8"><div>poly8x16_t <b><b>vqtbx2q_p8</b></b> (poly8x16_t a, poly8x16x2_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_s8" type="checkbox"><label for="vqtbx3_s8"><div>int8x8_t <b><b>vqtbx3_s8</b></b> (int8x8_t a, int8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_s8" type="checkbox"><label for="vqtbx3q_s8"><div>int8x16_t <b><b>vqtbx3q_s8</b></b> (int8x16_t a, int8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_u8" type="checkbox"><label for="vqtbx3_u8"><div>uint8x8_t <b><b>vqtbx3_u8</b></b> (uint8x8_t a, uint8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_u8" type="checkbox"><label for="vqtbx3q_u8"><div>uint8x16_t <b><b>vqtbx3q_u8</b></b> (uint8x16_t a, uint8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3_p8" type="checkbox"><label for="vqtbx3_p8"><div>poly8x8_t <b><b>vqtbx3_p8</b></b> (poly8x8_t a, poly8x16x3_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx3q_p8" type="checkbox"><label for="vqtbx3q_p8"><div>poly8x16_t <b><b>vqtbx3q_p8</b></b> (poly8x16_t a, poly8x16x3_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_s8" type="checkbox"><label for="vqtbx4_s8"><div>int8x8_t <b><b>vqtbx4_s8</b></b> (int8x8_t a, int8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_s8" type="checkbox"><label for="vqtbx4q_s8"><div>int8x16_t <b><b>vqtbx4q_s8</b></b> (int8x16_t a, int8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_u8" type="checkbox"><label for="vqtbx4_u8"><div>uint8x8_t <b><b>vqtbx4_u8</b></b> (uint8x8_t a, uint8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_u8" type="checkbox"><label for="vqtbx4q_u8"><div>uint8x16_t <b><b>vqtbx4q_u8</b></b> (uint8x16_t a, uint8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4_p8" type="checkbox"><label for="vqtbx4_p8"><div>poly8x8_t <b><b>vqtbx4_p8</b></b> (poly8x8_t a, poly8x16x4_t t, uint8x8_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vqtbx4q_p8" type="checkbox"><label for="vqtbx4q_p8"><div>poly8x16_t <b><b>vqtbx4q_p8</b></b> (poly8x16_t a, poly8x16x4_t t, uint8x16_t idx)<span class="right">Table vector lookup extension</span></div></label><article>      <h4>Description</h4><p><p class="aml">Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&amp;FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&amp;FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&amp;FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/tbx-table-vector-lookup-extension">TBX</a> Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B <br />
+t.val[0] &rarr; Vn.16B <br />
+t.val[1] &rarr; Vn+1.16B <br />
+t.val[2] &rarr; Vn+2.16B <br />
+t.val[3] &rarr; Vn+3.16B <br />
+idx &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) indices = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128*regs) table = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table&lt;128*i+127:128*i&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.0" title="function: bits(N) Zeros()">Zeros</a>() else <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+for i = 0 to elements-1
+    index = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[indices, i, 8]);
+    if index &lt; 16 * regs then
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, i, 8] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[table, index, 8];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u8" type="checkbox"><label for="vget_lane_u8"><div>uint8_t <b><b>vget_lane_u8</b></b> (uint8x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u16" type="checkbox"><label for="vget_lane_u16"><div>uint16_t <b><b>vget_lane_u16</b></b> (uint16x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u32" type="checkbox"><label for="vget_lane_u32"><div>uint32_t <b><b>vget_lane_u32</b></b> (uint32x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_u64" type="checkbox"><label for="vget_lane_u64"><div>uint64_t <b><b>vget_lane_u64</b></b> (uint64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p64" type="checkbox"><label for="vget_lane_p64"><div>poly64_t <b><b>vget_lane_p64</b></b> (poly64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s8" type="checkbox"><label for="vget_lane_s8"><div>int8_t <b><b>vget_lane_s8</b></b> (int8x8_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s16" type="checkbox"><label for="vget_lane_s16"><div>int16_t <b><b>vget_lane_s16</b></b> (int16x4_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s32" type="checkbox"><label for="vget_lane_s32"><div>int32_t <b><b>vget_lane_s32</b></b> (int32x2_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_s64" type="checkbox"><label for="vget_lane_s64"><div>int64_t <b><b>vget_lane_s64</b></b> (int64x1_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p8" type="checkbox"><label for="vget_lane_p8"><div>poly8_t <b><b>vget_lane_p8</b></b> (poly8x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_p16" type="checkbox"><label for="vget_lane_p16"><div>poly16_t <b><b>vget_lane_p16</b></b> (poly16x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f32" type="checkbox"><label for="vget_lane_f32"><div>float32_t <b><b>vget_lane_f32</b></b> (float32x2_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f64" type="checkbox"><label for="vget_lane_f64"><div>float64_t <b><b>vget_lane_f64</b></b> (float64x1_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u8" type="checkbox"><label for="vgetq_lane_u8"><div>uint8_t <b><b>vgetq_lane_u8</b></b> (uint8x16_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u16" type="checkbox"><label for="vgetq_lane_u16"><div>uint16_t <b><b>vgetq_lane_u16</b></b> (uint16x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u32" type="checkbox"><label for="vgetq_lane_u32"><div>uint32_t <b><b>vgetq_lane_u32</b></b> (uint32x4_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_u64" type="checkbox"><label for="vgetq_lane_u64"><div>uint64_t <b><b>vgetq_lane_u64</b></b> (uint64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p64" type="checkbox"><label for="vgetq_lane_p64"><div>poly64_t <b><b>vgetq_lane_p64</b></b> (poly64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s8" type="checkbox"><label for="vgetq_lane_s8"><div>int8_t <b><b>vgetq_lane_s8</b></b> (int8x16_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s16" type="checkbox"><label for="vgetq_lane_s16"><div>int16_t <b><b>vgetq_lane_s16</b></b> (int16x8_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s32" type="checkbox"><label for="vgetq_lane_s32"><div>int32_t <b><b>vgetq_lane_s32</b></b> (int32x4_t v, const int lane)<span class="right">Signed move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&amp;FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/smov-signed-move-vector-element-to-general-purpose-register">SMOV</a> Rd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SignExtend.2" title="function: bits(N) SignExtend(bits(M) x, integer N)">SignExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_s64" type="checkbox"><label for="vgetq_lane_s64"><div>int64_t <b><b>vgetq_lane_s64</b></b> (int64x2_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p8" type="checkbox"><label for="vgetq_lane_p8"><div>poly8_t <b><b>vgetq_lane_p8</b></b> (poly8x16_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.B[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_p16" type="checkbox"><label for="vgetq_lane_p16"><div>poly16_t <b><b>vgetq_lane_p16</b></b> (poly16x8_t v, const int lane)<span class="right">Unsigned move vector element to general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&amp;FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/umov-unsigned-move-vector-element-to-general-purpose-register">UMOV</a> Rd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Rd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(idxdsize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ZeroExtend.2" title="function: bits(N) ZeroExtend(bits(M) x, integer N)">ZeroExtend</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, index, esize], datasize);</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vget_lane_f16" type="checkbox"><label for="vget_lane_f16"><div>float16_t <b><b>vget_lane_f16</b></b> (float16x4_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f16" type="checkbox"><label for="vgetq_lane_f16"><div>float16_t <b><b>vgetq_lane_f16</b></b> (float16x8_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Hd,Vn.H[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Hd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f32" type="checkbox"><label for="vgetq_lane_f32"><div>float32_t <b><b>vgetq_lane_f32</b></b> (float32x4_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Sd,Vn.S[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vgetq_lane_f64" type="checkbox"><label for="vgetq_lane_f64"><div>float64_t <b><b>vgetq_lane_f64</b></b> (float64x2_t v, const int lane)<span class="right">Duplicate general-purpose register to vector</span></div></label><article>      <h4>Description</h4><p><p class="aml">Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/dup-general-duplicate-general-purpose-register-to-vector">DUP</a> Dd,Vn.D[lane]
+</pre>      <h4>Argument Preparation</h4><pre>v &rarr; Vn.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u8" type="checkbox"><label for="vset_lane_u8"><div>uint8x8_t <b><b>vset_lane_u8</b></b> (uint8_t a, uint8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u16" type="checkbox"><label for="vset_lane_u16"><div>uint16x4_t <b><b>vset_lane_u16</b></b> (uint16_t a, uint16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u32" type="checkbox"><label for="vset_lane_u32"><div>uint32x2_t <b><b>vset_lane_u32</b></b> (uint32_t a, uint32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_u64" type="checkbox"><label for="vset_lane_u64"><div>uint64x1_t <b><b>vset_lane_u64</b></b> (uint64_t a, uint64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p64" type="checkbox"><label for="vset_lane_p64"><div>poly64x1_t <b><b>vset_lane_p64</b></b> (poly64_t a, poly64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s8" type="checkbox"><label for="vset_lane_s8"><div>int8x8_t <b><b>vset_lane_s8</b></b> (int8_t a, int8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s16" type="checkbox"><label for="vset_lane_s16"><div>int16x4_t <b><b>vset_lane_s16</b></b> (int16_t a, int16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s32" type="checkbox"><label for="vset_lane_s32"><div>int32x2_t <b><b>vset_lane_s32</b></b> (int32_t a, int32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_s64" type="checkbox"><label for="vset_lane_s64"><div>int64x1_t <b><b>vset_lane_s64</b></b> (int64_t a, int64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p8" type="checkbox"><label for="vset_lane_p8"><div>poly8x8_t <b><b>vset_lane_p8</b></b> (poly8_t a, poly8x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8B <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_p16" type="checkbox"><label for="vset_lane_p16"><div>poly16x4_t <b><b>vset_lane_p16</b></b> (poly16_t a, poly16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f16" type="checkbox"><label for="vset_lane_f16"><div>float16x4_t <b><b>vset_lane_f16</b></b> (float16_t a, float16x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Vn.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; VnH <br />
+v &rarr; Vd.4H <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f16" type="checkbox"><label for="vsetq_lane_f16"><div>float16x8_t <b><b>vsetq_lane_f16</b></b> (float16_t a, float16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Vn.H[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; VnH <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f32" type="checkbox"><label for="vset_lane_f32"><div>float32x2_t <b><b>vset_lane_f32</b></b> (float32_t a, float32x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2S <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vset_lane_f64" type="checkbox"><label for="vset_lane_f64"><div>float64x1_t <b><b>vset_lane_f64</b></b> (float64_t a, float64x1_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.1D <br />
+0 &lt;&lt; lane &lt;&lt; 0 </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u8" type="checkbox"><label for="vsetq_lane_u8"><div>uint8x16_t <b><b>vsetq_lane_u8</b></b> (uint8_t a, uint8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u16" type="checkbox"><label for="vsetq_lane_u16"><div>uint16x8_t <b><b>vsetq_lane_u16</b></b> (uint16_t a, uint16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u32" type="checkbox"><label for="vsetq_lane_u32"><div>uint32x4_t <b><b>vsetq_lane_u32</b></b> (uint32_t a, uint32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_u64" type="checkbox"><label for="vsetq_lane_u64"><div>uint64x2_t <b><b>vsetq_lane_u64</b></b> (uint64_t a, uint64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p64" type="checkbox"><label for="vsetq_lane_p64"><div>poly64x2_t <b><b>vsetq_lane_p64</b></b> (poly64_t a, poly64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s8" type="checkbox"><label for="vsetq_lane_s8"><div>int8x16_t <b><b>vsetq_lane_s8</b></b> (int8_t a, int8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s16" type="checkbox"><label for="vsetq_lane_s16"><div>int16x8_t <b><b>vsetq_lane_s16</b></b> (int16_t a, int16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s32" type="checkbox"><label for="vsetq_lane_s32"><div>int32x4_t <b><b>vsetq_lane_s32</b></b> (int32_t a, int32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_s64" type="checkbox"><label for="vsetq_lane_s64"><div>int64x2_t <b><b>vsetq_lane_s64</b></b> (int64_t a, int64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p8" type="checkbox"><label for="vsetq_lane_p8"><div>poly8x16_t <b><b>vsetq_lane_p8</b></b> (poly8_t a, poly8x16_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.B[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.16B <br />
+0 &lt;&lt; lane &lt;&lt; 15 </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_p16" type="checkbox"><label for="vsetq_lane_p16"><div>poly16x8_t <b><b>vsetq_lane_p16</b></b> (poly16_t a, poly16x8_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.H[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.8H <br />
+0 &lt;&lt; lane &lt;&lt; 7 </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f32" type="checkbox"><label for="vsetq_lane_f32"><div>float32x4_t <b><b>vsetq_lane_f32</b></b> (float32_t a, float32x4_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.S[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.4S <br />
+0 &lt;&lt; lane &lt;&lt; 3 </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsetq_lane_f64" type="checkbox"><label for="vsetq_lane_f64"><div>float64x2_t <b><b>vsetq_lane_f64</b></b> (float64_t a, float64x2_t v, const int lane)<span class="right">Insert vector element from general-purpose register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ins-general-insert-vector-element-from-general-purpose-register">INS</a> Vd.D[lane],Rn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Rn <br />
+v &rarr; Vd.2D <br />
+0 &lt;&lt; lane &lt;&lt; 1 </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(esize) element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+bits(128) result;
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, index, esize] = element;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpxs_f32" type="checkbox"><label for="vrecpxs_f32"><div>float32_t <b><b>vrecpxs_f32</b></b> (float32_t a)<span class="right">Floating-point reciprocal exponent</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpx-floating-point-reciprocal-exponent-scalar">FRECPX</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecpX.2" title="function: bits(N) FPRecpX(bits(N) op, FPCRType fpcr)">FPRecpX</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vrecpxd_f64" type="checkbox"><label for="vrecpxd_f64"><div>float64_t <b><b>vrecpxd_f64</b></b> (float64_t a)<span class="right">Floating-point reciprocal exponent</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&amp;FP register, places the result in a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/frecpx-floating-point-reciprocal-exponent-scalar">FRECPX</a> Dd,Dn
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Dn </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPRecpX.2" title="function: bits(N) FPRecpX(bits(N) op, FPCRType fpcr)">FPRecpX</a>(element, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_n_f32" type="checkbox"><label for="vfma_n_f32"><div>float32x2_t <b><b>vfma_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_n_f32" type="checkbox"><label for="vfmaq_n_f32"><div>float32x4_t <b><b>vfmaq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vfms_n_f32" type="checkbox"><label for="vfms_n_f32"><div>float32x2_t <b><b>vfms_n_f32</b></b> (float32x2_t a, float32x2_t b, float32_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2S,Vn.2S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S <br />
+b &rarr; Vn.2S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_n_f32" type="checkbox"><label for="vfmsq_n_f32"><div>float32x4_t <b><b>vfmsq_n_f32</b></b> (float32x4_t a, float32x4_t b, float32_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.4S,Vn.4S,Vm.S[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S <br />
+b &rarr; Vn.4S <br />
+n &rarr; Vm.S[0] </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfma_n_f64" type="checkbox"><label for="vfma_n_f64"><div>float64x1_t <b><b>vfma_n_f64</b></b> (float64x1_t a, float64x1_t b, float64_t n)<span class="right">Floating-point fused multiply-add</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, adds the product to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmadd-floating-point-fused-multiply-add-scalar">FMADD</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+n &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmaq_n_f64" type="checkbox"><label for="vfmaq_n_f64"><div>float64x2_t <b><b>vfmaq_n_f64</b></b> (float64x2_t a, float64x2_t b, float64_t n)<span class="right">Floating-point fused multiply-add to accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, adds the product to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmla-vector-floating-point-fused-multiply-add-to-accumulator-vector">FMLA</a> Vd.2D,Vn.2D,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+n &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfms_n_f64" type="checkbox"><label for="vfms_n_f64"><div>float64x1_t <b><b>vfms_n_f64</b></b> (float64x1_t a, float64x1_t b, float64_t n)<span class="right">Floating-point fused multiply-subtract</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&amp;FP source registers, negates the product, adds that to the value of the third SIMD&amp;FP source register, and writes the result to the SIMD&amp;FP destination register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmsub-floating-point-fused-multiply-subtract-scalar">FMSUB</a> Dd,Dn,Dm,Da
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Da <br />
+b &rarr; Dn <br />
+n &rarr; Dm </pre>      <h4>Results</h4>      <pre>Dd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) result;
+bits(datasize) operanda = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[a];
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+
+operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(operand1);
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(operanda, operand1, operand2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vfmsq_n_f64" type="checkbox"><label for="vfmsq_n_f64"><div>float64x2_t <b><b>vfmsq_n_f64</b></b> (float64x2_t a, float64x2_t b, float64_t n)<span class="right">Floating-point fused multiply-subtract from accumulator</span></div></label><article>      <h4>Description</h4><p><p class="aml">Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&amp;FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&amp;FP register, and writes the result to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/fmls-vector-floating-point-fused-multiply-subtract-from-accumulator-vector">FMLS</a> Vd.2D,Vn.2D,Vm.D[0]
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D <br />
+b &rarr; Vn.2D <br />
+n &rarr; Vm.D[0] </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    if sub_op then element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPNeg.1" title="function: bits(N) FPNeg(bits(N) op)">FPNeg</a>(element1);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.FPMulAdd.4" title="function: bits(N) FPMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr)">FPMulAdd</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, esize], element1, element2, FPCR);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s8" type="checkbox"><label for="vtrn_s8"><div>int8x8x2_t <b><b>vtrn_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s16" type="checkbox"><label for="vtrn_s16"><div>int16x4x2_t <b><b>vtrn_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u8" type="checkbox"><label for="vtrn_u8"><div>uint8x8x2_t <b><b>vtrn_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u16" type="checkbox"><label for="vtrn_u16"><div>uint16x4x2_t <b><b>vtrn_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_p8" type="checkbox"><label for="vtrn_p8"><div>poly8x8x2_t <b><b>vtrn_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_p16" type="checkbox"><label for="vtrn_p16"><div>poly16x4x2_t <b><b>vtrn_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_s32" type="checkbox"><label for="vtrn_s32"><div>int32x2x2_t <b><b>vtrn_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_f32" type="checkbox"><label for="vtrn_f32"><div>float32x2x2_t <b><b>vtrn_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrn_u32" type="checkbox"><label for="vtrn_u32"><div>uint32x2x2_t <b><b>vtrn_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s8" type="checkbox"><label for="vtrnq_s8"><div>int8x16x2_t <b><b>vtrnq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s16" type="checkbox"><label for="vtrnq_s16"><div>int16x8x2_t <b><b>vtrnq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_s32" type="checkbox"><label for="vtrnq_s32"><div>int32x4x2_t <b><b>vtrnq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_f32" type="checkbox"><label for="vtrnq_f32"><div>float32x4x2_t <b><b>vtrnq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u8" type="checkbox"><label for="vtrnq_u8"><div>uint8x16x2_t <b><b>vtrnq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u16" type="checkbox"><label for="vtrnq_u16"><div>uint16x8x2_t <b><b>vtrnq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_u32" type="checkbox"><label for="vtrnq_u32"><div>uint32x4x2_t <b><b>vtrnq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_p8" type="checkbox"><label for="vtrnq_p8"><div>poly8x16x2_t <b><b>vtrnq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vtrnq_p16" type="checkbox"><label for="vtrnq_p16"><div>poly16x8x2_t <b><b>vtrnq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Transpose vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&amp;FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn1-transpose-vectors-primary">TRN1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/trn2-transpose-vectors-secondary">TRN2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*p+part, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*p+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s8" type="checkbox"><label for="vzip_s8"><div>int8x8x2_t <b><b>vzip_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s16" type="checkbox"><label for="vzip_s16"><div>int16x4x2_t <b><b>vzip_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u8" type="checkbox"><label for="vzip_u8"><div>uint8x8x2_t <b><b>vzip_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u16" type="checkbox"><label for="vzip_u16"><div>uint16x4x2_t <b><b>vzip_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_p8" type="checkbox"><label for="vzip_p8"><div>poly8x8x2_t <b><b>vzip_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_p16" type="checkbox"><label for="vzip_p16"><div>poly16x4x2_t <b><b>vzip_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_s32" type="checkbox"><label for="vzip_s32"><div>int32x2x2_t <b><b>vzip_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_f32" type="checkbox"><label for="vzip_f32"><div>float32x2x2_t <b><b>vzip_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzip_u32" type="checkbox"><label for="vzip_u32"><div>uint32x2x2_t <b><b>vzip_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s8" type="checkbox"><label for="vzipq_s8"><div>int8x16x2_t <b><b>vzipq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s16" type="checkbox"><label for="vzipq_s16"><div>int16x8x2_t <b><b>vzipq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_s32" type="checkbox"><label for="vzipq_s32"><div>int32x4x2_t <b><b>vzipq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_f32" type="checkbox"><label for="vzipq_f32"><div>float32x4x2_t <b><b>vzipq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u8" type="checkbox"><label for="vzipq_u8"><div>uint8x16x2_t <b><b>vzipq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u16" type="checkbox"><label for="vzipq_u16"><div>uint16x8x2_t <b><b>vzipq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_u32" type="checkbox"><label for="vzipq_u32"><div>uint32x4x2_t <b><b>vzipq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_p8" type="checkbox"><label for="vzipq_p8"><div>poly8x16x2_t <b><b>vzipq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vzipq_p16" type="checkbox"><label for="vzipq_p16"><div>poly16x8x2_t <b><b>vzipq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Zip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&amp;FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&amp;FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip1-zip-vectors-primary">ZIP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/zip2-zip-vectors-secondary">ZIP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+0, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, base+p, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, 2*p+1, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, base+p, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s8" type="checkbox"><label for="vuzp_s8"><div>int8x8x2_t <b><b>vuzp_s8</b></b> (int8x8_t a, int8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s16" type="checkbox"><label for="vuzp_s16"><div>int16x4x2_t <b><b>vuzp_s16</b></b> (int16x4_t a, int16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_s32" type="checkbox"><label for="vuzp_s32"><div>int32x2x2_t <b><b>vuzp_s32</b></b> (int32x2_t a, int32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_f32" type="checkbox"><label for="vuzp_f32"><div>float32x2x2_t <b><b>vuzp_f32</b></b> (float32x2_t a, float32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u8" type="checkbox"><label for="vuzp_u8"><div>uint8x8x2_t <b><b>vuzp_u8</b></b> (uint8x8_t a, uint8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u16" type="checkbox"><label for="vuzp_u16"><div>uint16x4x2_t <b><b>vuzp_u16</b></b> (uint16x4_t a, uint16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_u32" type="checkbox"><label for="vuzp_u32"><div>uint32x2x2_t <b><b>vuzp_u32</b></b> (uint32x2_t a, uint32x2_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.2S,Vn.2S,Vm.2S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.2S,Vn.2S,Vm.2S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2S <br />
+b &rarr; Vm.2S </pre>      <h4>Results</h4>      <pre>Vd1.2S &rarr; result.val[0]
+Vd2.2S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_p8" type="checkbox"><label for="vuzp_p8"><div>poly8x8x2_t <b><b>vuzp_p8</b></b> (poly8x8_t a, poly8x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8B,Vn.8B,Vm.8B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8B,Vn.8B,Vm.8B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8B <br />
+b &rarr; Vm.8B </pre>      <h4>Results</h4>      <pre>Vd1.8B &rarr; result.val[0]
+Vd2.8B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzp_p16" type="checkbox"><label for="vuzp_p16"><div>poly16x4x2_t <b><b>vuzp_p16</b></b> (poly16x4_t a, poly16x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4H,Vn.4H,Vm.4H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4H,Vn.4H,Vm.4H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4H <br />
+b &rarr; Vm.4H </pre>      <h4>Results</h4>      <pre>Vd1.4H &rarr; result.val[0]
+Vd2.4H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s8" type="checkbox"><label for="vuzpq_s8"><div>int8x16x2_t <b><b>vuzpq_s8</b></b> (int8x16_t a, int8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s16" type="checkbox"><label for="vuzpq_s16"><div>int16x8x2_t <b><b>vuzpq_s16</b></b> (int16x8_t a, int16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_s32" type="checkbox"><label for="vuzpq_s32"><div>int32x4x2_t <b><b>vuzpq_s32</b></b> (int32x4_t a, int32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_f32" type="checkbox"><label for="vuzpq_f32"><div>float32x4x2_t <b><b>vuzpq_f32</b></b> (float32x4_t a, float32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u8" type="checkbox"><label for="vuzpq_u8"><div>uint8x16x2_t <b><b>vuzpq_u8</b></b> (uint8x16_t a, uint8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u16" type="checkbox"><label for="vuzpq_u16"><div>uint16x8x2_t <b><b>vuzpq_u16</b></b> (uint16x8_t a, uint16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_u32" type="checkbox"><label for="vuzpq_u32"><div>uint32x4x2_t <b><b>vuzpq_u32</b></b> (uint32x4_t a, uint32x4_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.4S,Vn.4S,Vm.4S
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.4S <br />
+b &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd1.4S &rarr; result.val[0]
+Vd2.4S &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_p8" type="checkbox"><label for="vuzpq_p8"><div>poly8x16x2_t <b><b>vuzpq_p8</b></b> (poly8x16_t a, poly8x16_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.16B,Vn.16B,Vm.16B
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.16B,Vn.16B,Vm.16B
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.16B <br />
+b &rarr; Vm.16B </pre>      <h4>Results</h4>      <pre>Vd1.16B &rarr; result.val[0]
+Vd2.16B &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vuzpq_p16" type="checkbox"><label for="vuzpq_p16"><div>poly16x8x2_t <b><b>vuzpq_p16</b></b> (poly16x8_t a, poly16x8_t b)<span class="right">Unzip vectors</span></div></label><article>      <h4>Description</h4><p><p class="aml">Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&amp;FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&amp;FP register.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp1-unzip-vectors-primary">UZP1</a> Vd1.8H,Vn.8H,Vm.8H
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/uzp2-unzip-vectors-secondary">UZP2</a> Vd2.8H,Vn.8H,Vm.8H
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.8H <br />
+b &rarr; Vm.8H </pre>      <h4>Results</h4>      <pre>Vd1.8H &rarr; result.val[0]
+Vd2.8H &rarr; result.val[1]
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operandl = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(datasize) operandh = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[zipped, 2*e+part, esize];
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s8" type="checkbox"><label for="vreinterpret_s16_s8"><div>int16x4_t <b><b>vreinterpret_s16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s8" type="checkbox"><label for="vreinterpret_s32_s8"><div>int32x2_t <b><b>vreinterpret_s32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s8" type="checkbox"><label for="vreinterpret_f32_s8"><div>float32x2_t <b><b>vreinterpret_f32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s8" type="checkbox"><label for="vreinterpret_u8_s8"><div>uint8x8_t <b><b>vreinterpret_u8_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s8" type="checkbox"><label for="vreinterpret_u16_s8"><div>uint16x4_t <b><b>vreinterpret_u16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s8" type="checkbox"><label for="vreinterpret_u32_s8"><div>uint32x2_t <b><b>vreinterpret_u32_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s8" type="checkbox"><label for="vreinterpret_p8_s8"><div>poly8x8_t <b><b>vreinterpret_p8_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s8" type="checkbox"><label for="vreinterpret_p16_s8"><div>poly16x4_t <b><b>vreinterpret_p16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s8" type="checkbox"><label for="vreinterpret_u64_s8"><div>uint64x1_t <b><b>vreinterpret_u64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s8" type="checkbox"><label for="vreinterpret_s64_s8"><div>int64x1_t <b><b>vreinterpret_s64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s8" type="checkbox"><label for="vreinterpret_f64_s8"><div>float64x1_t <b><b>vreinterpret_f64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s8" type="checkbox"><label for="vreinterpret_p64_s8"><div>poly64x1_t <b><b>vreinterpret_p64_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s8" type="checkbox"><label for="vreinterpret_f16_s8"><div>float16x4_t <b><b>vreinterpret_f16_s8</b></b> (int8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s16" type="checkbox"><label for="vreinterpret_s8_s16"><div>int8x8_t <b><b>vreinterpret_s8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s16" type="checkbox"><label for="vreinterpret_s32_s16"><div>int32x2_t <b><b>vreinterpret_s32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s16" type="checkbox"><label for="vreinterpret_f32_s16"><div>float32x2_t <b><b>vreinterpret_f32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s16" type="checkbox"><label for="vreinterpret_u8_s16"><div>uint8x8_t <b><b>vreinterpret_u8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s16" type="checkbox"><label for="vreinterpret_u16_s16"><div>uint16x4_t <b><b>vreinterpret_u16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s16" type="checkbox"><label for="vreinterpret_u32_s16"><div>uint32x2_t <b><b>vreinterpret_u32_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s16" type="checkbox"><label for="vreinterpret_p8_s16"><div>poly8x8_t <b><b>vreinterpret_p8_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s16" type="checkbox"><label for="vreinterpret_p16_s16"><div>poly16x4_t <b><b>vreinterpret_p16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s16" type="checkbox"><label for="vreinterpret_u64_s16"><div>uint64x1_t <b><b>vreinterpret_u64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s16" type="checkbox"><label for="vreinterpret_s64_s16"><div>int64x1_t <b><b>vreinterpret_s64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s16" type="checkbox"><label for="vreinterpret_f64_s16"><div>float64x1_t <b><b>vreinterpret_f64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s16" type="checkbox"><label for="vreinterpret_p64_s16"><div>poly64x1_t <b><b>vreinterpret_p64_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s16" type="checkbox"><label for="vreinterpret_f16_s16"><div>float16x4_t <b><b>vreinterpret_f16_s16</b></b> (int16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s32" type="checkbox"><label for="vreinterpret_s8_s32"><div>int8x8_t <b><b>vreinterpret_s8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s32" type="checkbox"><label for="vreinterpret_s16_s32"><div>int16x4_t <b><b>vreinterpret_s16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s32" type="checkbox"><label for="vreinterpret_f32_s32"><div>float32x2_t <b><b>vreinterpret_f32_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s32" type="checkbox"><label for="vreinterpret_u8_s32"><div>uint8x8_t <b><b>vreinterpret_u8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s32" type="checkbox"><label for="vreinterpret_u16_s32"><div>uint16x4_t <b><b>vreinterpret_u16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s32" type="checkbox"><label for="vreinterpret_u32_s32"><div>uint32x2_t <b><b>vreinterpret_u32_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s32" type="checkbox"><label for="vreinterpret_p8_s32"><div>poly8x8_t <b><b>vreinterpret_p8_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s32" type="checkbox"><label for="vreinterpret_p16_s32"><div>poly16x4_t <b><b>vreinterpret_p16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s32" type="checkbox"><label for="vreinterpret_u64_s32"><div>uint64x1_t <b><b>vreinterpret_u64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_s32" type="checkbox"><label for="vreinterpret_s64_s32"><div>int64x1_t <b><b>vreinterpret_s64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s32" type="checkbox"><label for="vreinterpret_f64_s32"><div>float64x1_t <b><b>vreinterpret_f64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_s32" type="checkbox"><label for="vreinterpret_p64_s32"><div>poly64x1_t <b><b>vreinterpret_p64_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s32" type="checkbox"><label for="vreinterpret_f16_s32"><div>float16x4_t <b><b>vreinterpret_f16_s32</b></b> (int32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f32" type="checkbox"><label for="vreinterpret_s8_f32"><div>int8x8_t <b><b>vreinterpret_s8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f32" type="checkbox"><label for="vreinterpret_s16_f32"><div>int16x4_t <b><b>vreinterpret_s16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f32" type="checkbox"><label for="vreinterpret_s32_f32"><div>int32x2_t <b><b>vreinterpret_s32_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f32" type="checkbox"><label for="vreinterpret_u8_f32"><div>uint8x8_t <b><b>vreinterpret_u8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f32" type="checkbox"><label for="vreinterpret_u16_f32"><div>uint16x4_t <b><b>vreinterpret_u16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f32" type="checkbox"><label for="vreinterpret_u32_f32"><div>uint32x2_t <b><b>vreinterpret_u32_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f32" type="checkbox"><label for="vreinterpret_p8_f32"><div>poly8x8_t <b><b>vreinterpret_p8_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f32" type="checkbox"><label for="vreinterpret_p16_f32"><div>poly16x4_t <b><b>vreinterpret_p16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f32" type="checkbox"><label for="vreinterpret_u64_f32"><div>uint64x1_t <b><b>vreinterpret_u64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f32" type="checkbox"><label for="vreinterpret_s64_f32"><div>int64x1_t <b><b>vreinterpret_s64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_f32" type="checkbox"><label for="vreinterpret_f64_f32"><div>float64x1_t <b><b>vreinterpret_f64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f32" type="checkbox"><label for="vreinterpret_p64_f32"><div>poly64x1_t <b><b>vreinterpret_p64_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f64" type="checkbox"><label for="vreinterpret_p64_f64"><div>poly64x1_t <b><b>vreinterpret_p64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_f32" type="checkbox"><label for="vreinterpret_f16_f32"><div>float16x4_t <b><b>vreinterpret_f16_f32</b></b> (float32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u8" type="checkbox"><label for="vreinterpret_s8_u8"><div>int8x8_t <b><b>vreinterpret_s8_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u8" type="checkbox"><label for="vreinterpret_s16_u8"><div>int16x4_t <b><b>vreinterpret_s16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u8" type="checkbox"><label for="vreinterpret_s32_u8"><div>int32x2_t <b><b>vreinterpret_s32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u8" type="checkbox"><label for="vreinterpret_f32_u8"><div>float32x2_t <b><b>vreinterpret_f32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u8" type="checkbox"><label for="vreinterpret_u16_u8"><div>uint16x4_t <b><b>vreinterpret_u16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u8" type="checkbox"><label for="vreinterpret_u32_u8"><div>uint32x2_t <b><b>vreinterpret_u32_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u8" type="checkbox"><label for="vreinterpret_p8_u8"><div>poly8x8_t <b><b>vreinterpret_p8_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u8" type="checkbox"><label for="vreinterpret_p16_u8"><div>poly16x4_t <b><b>vreinterpret_p16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u8" type="checkbox"><label for="vreinterpret_u64_u8"><div>uint64x1_t <b><b>vreinterpret_u64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u8" type="checkbox"><label for="vreinterpret_s64_u8"><div>int64x1_t <b><b>vreinterpret_s64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u8" type="checkbox"><label for="vreinterpret_f64_u8"><div>float64x1_t <b><b>vreinterpret_f64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u8" type="checkbox"><label for="vreinterpret_p64_u8"><div>poly64x1_t <b><b>vreinterpret_p64_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u8" type="checkbox"><label for="vreinterpret_f16_u8"><div>float16x4_t <b><b>vreinterpret_f16_u8</b></b> (uint8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u16" type="checkbox"><label for="vreinterpret_s8_u16"><div>int8x8_t <b><b>vreinterpret_s8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u16" type="checkbox"><label for="vreinterpret_s16_u16"><div>int16x4_t <b><b>vreinterpret_s16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u16" type="checkbox"><label for="vreinterpret_s32_u16"><div>int32x2_t <b><b>vreinterpret_s32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u16" type="checkbox"><label for="vreinterpret_f32_u16"><div>float32x2_t <b><b>vreinterpret_f32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u16" type="checkbox"><label for="vreinterpret_u8_u16"><div>uint8x8_t <b><b>vreinterpret_u8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u16" type="checkbox"><label for="vreinterpret_u32_u16"><div>uint32x2_t <b><b>vreinterpret_u32_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u16" type="checkbox"><label for="vreinterpret_p8_u16"><div>poly8x8_t <b><b>vreinterpret_p8_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u16" type="checkbox"><label for="vreinterpret_p16_u16"><div>poly16x4_t <b><b>vreinterpret_p16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u16" type="checkbox"><label for="vreinterpret_u64_u16"><div>uint64x1_t <b><b>vreinterpret_u64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u16" type="checkbox"><label for="vreinterpret_s64_u16"><div>int64x1_t <b><b>vreinterpret_s64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u16" type="checkbox"><label for="vreinterpret_f64_u16"><div>float64x1_t <b><b>vreinterpret_f64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u16" type="checkbox"><label for="vreinterpret_p64_u16"><div>poly64x1_t <b><b>vreinterpret_p64_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u16" type="checkbox"><label for="vreinterpret_f16_u16"><div>float16x4_t <b><b>vreinterpret_f16_u16</b></b> (uint16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u32" type="checkbox"><label for="vreinterpret_s8_u32"><div>int8x8_t <b><b>vreinterpret_s8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u32" type="checkbox"><label for="vreinterpret_s16_u32"><div>int16x4_t <b><b>vreinterpret_s16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u32" type="checkbox"><label for="vreinterpret_s32_u32"><div>int32x2_t <b><b>vreinterpret_s32_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u32" type="checkbox"><label for="vreinterpret_f32_u32"><div>float32x2_t <b><b>vreinterpret_f32_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u32" type="checkbox"><label for="vreinterpret_u8_u32"><div>uint8x8_t <b><b>vreinterpret_u8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u32" type="checkbox"><label for="vreinterpret_u16_u32"><div>uint16x4_t <b><b>vreinterpret_u16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u32" type="checkbox"><label for="vreinterpret_p8_u32"><div>poly8x8_t <b><b>vreinterpret_p8_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u32" type="checkbox"><label for="vreinterpret_p16_u32"><div>poly16x4_t <b><b>vreinterpret_p16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_u32" type="checkbox"><label for="vreinterpret_u64_u32"><div>uint64x1_t <b><b>vreinterpret_u64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u32" type="checkbox"><label for="vreinterpret_s64_u32"><div>int64x1_t <b><b>vreinterpret_s64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u32" type="checkbox"><label for="vreinterpret_f64_u32"><div>float64x1_t <b><b>vreinterpret_f64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u32" type="checkbox"><label for="vreinterpret_p64_u32"><div>poly64x1_t <b><b>vreinterpret_p64_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u32" type="checkbox"><label for="vreinterpret_f16_u32"><div>float16x4_t <b><b>vreinterpret_f16_u32</b></b> (uint32x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2S </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p8" type="checkbox"><label for="vreinterpret_s8_p8"><div>int8x8_t <b><b>vreinterpret_s8_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p8" type="checkbox"><label for="vreinterpret_s16_p8"><div>int16x4_t <b><b>vreinterpret_s16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p8" type="checkbox"><label for="vreinterpret_s32_p8"><div>int32x2_t <b><b>vreinterpret_s32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_p8" type="checkbox"><label for="vreinterpret_f32_p8"><div>float32x2_t <b><b>vreinterpret_f32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p8" type="checkbox"><label for="vreinterpret_u8_p8"><div>uint8x8_t <b><b>vreinterpret_u8_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p8" type="checkbox"><label for="vreinterpret_u16_p8"><div>uint16x4_t <b><b>vreinterpret_u16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p8" type="checkbox"><label for="vreinterpret_u32_p8"><div>uint32x2_t <b><b>vreinterpret_u32_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_p8" type="checkbox"><label for="vreinterpret_p16_p8"><div>poly16x4_t <b><b>vreinterpret_p16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p8" type="checkbox"><label for="vreinterpret_u64_p8"><div>uint64x1_t <b><b>vreinterpret_u64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p8" type="checkbox"><label for="vreinterpret_s64_p8"><div>int64x1_t <b><b>vreinterpret_s64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p8" type="checkbox"><label for="vreinterpret_f64_p8"><div>float64x1_t <b><b>vreinterpret_f64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_p8" type="checkbox"><label for="vreinterpret_p64_p8"><div>poly64x1_t <b><b>vreinterpret_p64_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p8" type="checkbox"><label for="vreinterpret_f16_p8"><div>float16x4_t <b><b>vreinterpret_f16_p8</b></b> (poly8x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8B </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p16" type="checkbox"><label for="vreinterpret_s8_p16"><div>int8x8_t <b><b>vreinterpret_s8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p16" type="checkbox"><label for="vreinterpret_s16_p16"><div>int16x4_t <b><b>vreinterpret_s16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p16" type="checkbox"><label for="vreinterpret_s32_p16"><div>int32x2_t <b><b>vreinterpret_s32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_p16" type="checkbox"><label for="vreinterpret_f32_p16"><div>float32x2_t <b><b>vreinterpret_f32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p16" type="checkbox"><label for="vreinterpret_u8_p16"><div>uint8x8_t <b><b>vreinterpret_u8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p16" type="checkbox"><label for="vreinterpret_u16_p16"><div>uint16x4_t <b><b>vreinterpret_u16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p16" type="checkbox"><label for="vreinterpret_u32_p16"><div>uint32x2_t <b><b>vreinterpret_u32_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_p16" type="checkbox"><label for="vreinterpret_p8_p16"><div>poly8x8_t <b><b>vreinterpret_p8_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p16" type="checkbox"><label for="vreinterpret_u64_p16"><div>uint64x1_t <b><b>vreinterpret_u64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p16" type="checkbox"><label for="vreinterpret_s64_p16"><div>int64x1_t <b><b>vreinterpret_s64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p16" type="checkbox"><label for="vreinterpret_f64_p16"><div>float64x1_t <b><b>vreinterpret_f64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_p16" type="checkbox"><label for="vreinterpret_p64_p16"><div>poly64x1_t <b><b>vreinterpret_p64_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p16" type="checkbox"><label for="vreinterpret_f16_p16"><div>float16x4_t <b><b>vreinterpret_f16_p16</b></b> (poly16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_u64" type="checkbox"><label for="vreinterpret_s8_u64"><div>int8x8_t <b><b>vreinterpret_s8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_u64" type="checkbox"><label for="vreinterpret_s16_u64"><div>int16x4_t <b><b>vreinterpret_s16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_u64" type="checkbox"><label for="vreinterpret_s32_u64"><div>int32x2_t <b><b>vreinterpret_s32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_u64" type="checkbox"><label for="vreinterpret_f32_u64"><div>float32x2_t <b><b>vreinterpret_f32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_u64" type="checkbox"><label for="vreinterpret_u8_u64"><div>uint8x8_t <b><b>vreinterpret_u8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_u64" type="checkbox"><label for="vreinterpret_u16_u64"><div>uint16x4_t <b><b>vreinterpret_u16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_u64" type="checkbox"><label for="vreinterpret_u32_u64"><div>uint32x2_t <b><b>vreinterpret_u32_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_u64" type="checkbox"><label for="vreinterpret_p8_u64"><div>poly8x8_t <b><b>vreinterpret_p8_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_u64" type="checkbox"><label for="vreinterpret_p16_u64"><div>poly16x4_t <b><b>vreinterpret_p16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_u64" type="checkbox"><label for="vreinterpret_s64_u64"><div>int64x1_t <b><b>vreinterpret_s64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_u64" type="checkbox"><label for="vreinterpret_f64_u64"><div>float64x1_t <b><b>vreinterpret_f64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_u64" type="checkbox"><label for="vreinterpret_p64_u64"><div>poly64x1_t <b><b>vreinterpret_p64_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_u64" type="checkbox"><label for="vreinterpret_f16_u64"><div>float16x4_t <b><b>vreinterpret_f16_u64</b></b> (uint64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_s64" type="checkbox"><label for="vreinterpret_s8_s64"><div>int8x8_t <b><b>vreinterpret_s8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_s64" type="checkbox"><label for="vreinterpret_s16_s64"><div>int16x4_t <b><b>vreinterpret_s16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_s64" type="checkbox"><label for="vreinterpret_s32_s64"><div>int32x2_t <b><b>vreinterpret_s32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_s64" type="checkbox"><label for="vreinterpret_f32_s64"><div>float32x2_t <b><b>vreinterpret_f32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_s64" type="checkbox"><label for="vreinterpret_u8_s64"><div>uint8x8_t <b><b>vreinterpret_u8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_s64" type="checkbox"><label for="vreinterpret_u16_s64"><div>uint16x4_t <b><b>vreinterpret_u16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_s64" type="checkbox"><label for="vreinterpret_u32_s64"><div>uint32x2_t <b><b>vreinterpret_u32_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_s64" type="checkbox"><label for="vreinterpret_p8_s64"><div>poly8x8_t <b><b>vreinterpret_p8_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_s64" type="checkbox"><label for="vreinterpret_p16_s64"><div>poly16x4_t <b><b>vreinterpret_p16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_s64" type="checkbox"><label for="vreinterpret_u64_s64"><div>uint64x1_t <b><b>vreinterpret_u64_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_s64" type="checkbox"><label for="vreinterpret_f64_s64"><div>float64x1_t <b><b>vreinterpret_f64_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p64" type="checkbox"><label for="vreinterpret_u64_p64"><div>uint64x1_t <b><b>vreinterpret_u64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_s64" type="checkbox"><label for="vreinterpret_f16_s64"><div>float16x4_t <b><b>vreinterpret_f16_s64</b></b> (int64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f16" type="checkbox"><label for="vreinterpret_s8_f16"><div>int8x8_t <b><b>vreinterpret_s8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f16" type="checkbox"><label for="vreinterpret_s16_f16"><div>int16x4_t <b><b>vreinterpret_s16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f16" type="checkbox"><label for="vreinterpret_s32_f16"><div>int32x2_t <b><b>vreinterpret_s32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_f16" type="checkbox"><label for="vreinterpret_f32_f16"><div>float32x2_t <b><b>vreinterpret_f32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f16" type="checkbox"><label for="vreinterpret_u8_f16"><div>uint8x8_t <b><b>vreinterpret_u8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f16" type="checkbox"><label for="vreinterpret_u16_f16"><div>uint16x4_t <b><b>vreinterpret_u16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f16" type="checkbox"><label for="vreinterpret_u32_f16"><div>uint32x2_t <b><b>vreinterpret_u32_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f16" type="checkbox"><label for="vreinterpret_p8_f16"><div>poly8x8_t <b><b>vreinterpret_p8_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f16" type="checkbox"><label for="vreinterpret_p16_f16"><div>poly16x4_t <b><b>vreinterpret_p16_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f16" type="checkbox"><label for="vreinterpret_u64_f16"><div>uint64x1_t <b><b>vreinterpret_u64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f16" type="checkbox"><label for="vreinterpret_s64_f16"><div>int64x1_t <b><b>vreinterpret_s64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_f16" type="checkbox"><label for="vreinterpret_f64_f16"><div>float64x1_t <b><b>vreinterpret_f64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p64_f16" type="checkbox"><label for="vreinterpret_p64_f16"><div>poly64x1_t <b><b>vreinterpret_p64_f16</b></b> (float16x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4H </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s8" type="checkbox"><label for="vreinterpretq_s16_s8"><div>int16x8_t <b><b>vreinterpretq_s16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s8" type="checkbox"><label for="vreinterpretq_s32_s8"><div>int32x4_t <b><b>vreinterpretq_s32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s8" type="checkbox"><label for="vreinterpretq_f32_s8"><div>float32x4_t <b><b>vreinterpretq_f32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s8" type="checkbox"><label for="vreinterpretq_u8_s8"><div>uint8x16_t <b><b>vreinterpretq_u8_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s8" type="checkbox"><label for="vreinterpretq_u16_s8"><div>uint16x8_t <b><b>vreinterpretq_u16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s8" type="checkbox"><label for="vreinterpretq_u32_s8"><div>uint32x4_t <b><b>vreinterpretq_u32_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s8" type="checkbox"><label for="vreinterpretq_p8_s8"><div>poly8x16_t <b><b>vreinterpretq_p8_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s8" type="checkbox"><label for="vreinterpretq_p16_s8"><div>poly16x8_t <b><b>vreinterpretq_p16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s8" type="checkbox"><label for="vreinterpretq_u64_s8"><div>uint64x2_t <b><b>vreinterpretq_u64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s8" type="checkbox"><label for="vreinterpretq_s64_s8"><div>int64x2_t <b><b>vreinterpretq_s64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s8" type="checkbox"><label for="vreinterpretq_f64_s8"><div>float64x2_t <b><b>vreinterpretq_f64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s8" type="checkbox"><label for="vreinterpretq_p64_s8"><div>poly64x2_t <b><b>vreinterpretq_p64_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s8" type="checkbox"><label for="vreinterpretq_p128_s8"><div>poly128_t <b><b>vreinterpretq_p128_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s8" type="checkbox"><label for="vreinterpretq_f16_s8"><div>float16x8_t <b><b>vreinterpretq_f16_s8</b></b> (int8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s16" type="checkbox"><label for="vreinterpretq_s8_s16"><div>int8x16_t <b><b>vreinterpretq_s8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s16" type="checkbox"><label for="vreinterpretq_s32_s16"><div>int32x4_t <b><b>vreinterpretq_s32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s16" type="checkbox"><label for="vreinterpretq_f32_s16"><div>float32x4_t <b><b>vreinterpretq_f32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s16" type="checkbox"><label for="vreinterpretq_u8_s16"><div>uint8x16_t <b><b>vreinterpretq_u8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s16" type="checkbox"><label for="vreinterpretq_u16_s16"><div>uint16x8_t <b><b>vreinterpretq_u16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s16" type="checkbox"><label for="vreinterpretq_u32_s16"><div>uint32x4_t <b><b>vreinterpretq_u32_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s16" type="checkbox"><label for="vreinterpretq_p8_s16"><div>poly8x16_t <b><b>vreinterpretq_p8_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s16" type="checkbox"><label for="vreinterpretq_p16_s16"><div>poly16x8_t <b><b>vreinterpretq_p16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s16" type="checkbox"><label for="vreinterpretq_u64_s16"><div>uint64x2_t <b><b>vreinterpretq_u64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s16" type="checkbox"><label for="vreinterpretq_s64_s16"><div>int64x2_t <b><b>vreinterpretq_s64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s16" type="checkbox"><label for="vreinterpretq_f64_s16"><div>float64x2_t <b><b>vreinterpretq_f64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s16" type="checkbox"><label for="vreinterpretq_p64_s16"><div>poly64x2_t <b><b>vreinterpretq_p64_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s16" type="checkbox"><label for="vreinterpretq_p128_s16"><div>poly128_t <b><b>vreinterpretq_p128_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s16" type="checkbox"><label for="vreinterpretq_f16_s16"><div>float16x8_t <b><b>vreinterpretq_f16_s16</b></b> (int16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s32" type="checkbox"><label for="vreinterpretq_s8_s32"><div>int8x16_t <b><b>vreinterpretq_s8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s32" type="checkbox"><label for="vreinterpretq_s16_s32"><div>int16x8_t <b><b>vreinterpretq_s16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s32" type="checkbox"><label for="vreinterpretq_f32_s32"><div>float32x4_t <b><b>vreinterpretq_f32_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s32" type="checkbox"><label for="vreinterpretq_u8_s32"><div>uint8x16_t <b><b>vreinterpretq_u8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s32" type="checkbox"><label for="vreinterpretq_u16_s32"><div>uint16x8_t <b><b>vreinterpretq_u16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s32" type="checkbox"><label for="vreinterpretq_u32_s32"><div>uint32x4_t <b><b>vreinterpretq_u32_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s32" type="checkbox"><label for="vreinterpretq_p8_s32"><div>poly8x16_t <b><b>vreinterpretq_p8_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s32" type="checkbox"><label for="vreinterpretq_p16_s32"><div>poly16x8_t <b><b>vreinterpretq_p16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s32" type="checkbox"><label for="vreinterpretq_u64_s32"><div>uint64x2_t <b><b>vreinterpretq_u64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_s32" type="checkbox"><label for="vreinterpretq_s64_s32"><div>int64x2_t <b><b>vreinterpretq_s64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s32" type="checkbox"><label for="vreinterpretq_f64_s32"><div>float64x2_t <b><b>vreinterpretq_f64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s32" type="checkbox"><label for="vreinterpretq_p64_s32"><div>poly64x2_t <b><b>vreinterpretq_p64_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s32" type="checkbox"><label for="vreinterpretq_p128_s32"><div>poly128_t <b><b>vreinterpretq_p128_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s32" type="checkbox"><label for="vreinterpretq_f16_s32"><div>float16x8_t <b><b>vreinterpretq_f16_s32</b></b> (int32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f32" type="checkbox"><label for="vreinterpretq_s8_f32"><div>int8x16_t <b><b>vreinterpretq_s8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f32" type="checkbox"><label for="vreinterpretq_s16_f32"><div>int16x8_t <b><b>vreinterpretq_s16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f32" type="checkbox"><label for="vreinterpretq_s32_f32"><div>int32x4_t <b><b>vreinterpretq_s32_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f32" type="checkbox"><label for="vreinterpretq_u8_f32"><div>uint8x16_t <b><b>vreinterpretq_u8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f32" type="checkbox"><label for="vreinterpretq_u16_f32"><div>uint16x8_t <b><b>vreinterpretq_u16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f32" type="checkbox"><label for="vreinterpretq_u32_f32"><div>uint32x4_t <b><b>vreinterpretq_u32_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f32" type="checkbox"><label for="vreinterpretq_p8_f32"><div>poly8x16_t <b><b>vreinterpretq_p8_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f32" type="checkbox"><label for="vreinterpretq_p16_f32"><div>poly16x8_t <b><b>vreinterpretq_p16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f32" type="checkbox"><label for="vreinterpretq_u64_f32"><div>uint64x2_t <b><b>vreinterpretq_u64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f32" type="checkbox"><label for="vreinterpretq_s64_f32"><div>int64x2_t <b><b>vreinterpretq_s64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_f32" type="checkbox"><label for="vreinterpretq_f64_f32"><div>float64x2_t <b><b>vreinterpretq_f64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f32" type="checkbox"><label for="vreinterpretq_p64_f32"><div>poly64x2_t <b><b>vreinterpretq_p64_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f32" type="checkbox"><label for="vreinterpretq_p128_f32"><div>poly128_t <b><b>vreinterpretq_p128_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f64" type="checkbox"><label for="vreinterpretq_p64_f64"><div>poly64x2_t <b><b>vreinterpretq_p64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f64" type="checkbox"><label for="vreinterpretq_p128_f64"><div>poly128_t <b><b>vreinterpretq_p128_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_f32" type="checkbox"><label for="vreinterpretq_f16_f32"><div>float16x8_t <b><b>vreinterpretq_f16_f32</b></b> (float32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u8" type="checkbox"><label for="vreinterpretq_s8_u8"><div>int8x16_t <b><b>vreinterpretq_s8_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u8" type="checkbox"><label for="vreinterpretq_s16_u8"><div>int16x8_t <b><b>vreinterpretq_s16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u8" type="checkbox"><label for="vreinterpretq_s32_u8"><div>int32x4_t <b><b>vreinterpretq_s32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u8" type="checkbox"><label for="vreinterpretq_f32_u8"><div>float32x4_t <b><b>vreinterpretq_f32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u8" type="checkbox"><label for="vreinterpretq_u16_u8"><div>uint16x8_t <b><b>vreinterpretq_u16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u8" type="checkbox"><label for="vreinterpretq_u32_u8"><div>uint32x4_t <b><b>vreinterpretq_u32_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u8" type="checkbox"><label for="vreinterpretq_p8_u8"><div>poly8x16_t <b><b>vreinterpretq_p8_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u8" type="checkbox"><label for="vreinterpretq_p16_u8"><div>poly16x8_t <b><b>vreinterpretq_p16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u8" type="checkbox"><label for="vreinterpretq_u64_u8"><div>uint64x2_t <b><b>vreinterpretq_u64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u8" type="checkbox"><label for="vreinterpretq_s64_u8"><div>int64x2_t <b><b>vreinterpretq_s64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u8" type="checkbox"><label for="vreinterpretq_f64_u8"><div>float64x2_t <b><b>vreinterpretq_f64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u8" type="checkbox"><label for="vreinterpretq_p64_u8"><div>poly64x2_t <b><b>vreinterpretq_p64_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u8" type="checkbox"><label for="vreinterpretq_p128_u8"><div>poly128_t <b><b>vreinterpretq_p128_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u8" type="checkbox"><label for="vreinterpretq_f16_u8"><div>float16x8_t <b><b>vreinterpretq_f16_u8</b></b> (uint8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u16" type="checkbox"><label for="vreinterpretq_s8_u16"><div>int8x16_t <b><b>vreinterpretq_s8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u16" type="checkbox"><label for="vreinterpretq_s16_u16"><div>int16x8_t <b><b>vreinterpretq_s16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u16" type="checkbox"><label for="vreinterpretq_s32_u16"><div>int32x4_t <b><b>vreinterpretq_s32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u16" type="checkbox"><label for="vreinterpretq_f32_u16"><div>float32x4_t <b><b>vreinterpretq_f32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u16" type="checkbox"><label for="vreinterpretq_u8_u16"><div>uint8x16_t <b><b>vreinterpretq_u8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u16" type="checkbox"><label for="vreinterpretq_u32_u16"><div>uint32x4_t <b><b>vreinterpretq_u32_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u16" type="checkbox"><label for="vreinterpretq_p8_u16"><div>poly8x16_t <b><b>vreinterpretq_p8_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u16" type="checkbox"><label for="vreinterpretq_p16_u16"><div>poly16x8_t <b><b>vreinterpretq_p16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u16" type="checkbox"><label for="vreinterpretq_u64_u16"><div>uint64x2_t <b><b>vreinterpretq_u64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u16" type="checkbox"><label for="vreinterpretq_s64_u16"><div>int64x2_t <b><b>vreinterpretq_s64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u16" type="checkbox"><label for="vreinterpretq_f64_u16"><div>float64x2_t <b><b>vreinterpretq_f64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u16" type="checkbox"><label for="vreinterpretq_p64_u16"><div>poly64x2_t <b><b>vreinterpretq_p64_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u16" type="checkbox"><label for="vreinterpretq_p128_u16"><div>poly128_t <b><b>vreinterpretq_p128_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u16" type="checkbox"><label for="vreinterpretq_f16_u16"><div>float16x8_t <b><b>vreinterpretq_f16_u16</b></b> (uint16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u32" type="checkbox"><label for="vreinterpretq_s8_u32"><div>int8x16_t <b><b>vreinterpretq_s8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u32" type="checkbox"><label for="vreinterpretq_s16_u32"><div>int16x8_t <b><b>vreinterpretq_s16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u32" type="checkbox"><label for="vreinterpretq_s32_u32"><div>int32x4_t <b><b>vreinterpretq_s32_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u32" type="checkbox"><label for="vreinterpretq_f32_u32"><div>float32x4_t <b><b>vreinterpretq_f32_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u32" type="checkbox"><label for="vreinterpretq_u8_u32"><div>uint8x16_t <b><b>vreinterpretq_u8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u32" type="checkbox"><label for="vreinterpretq_u16_u32"><div>uint16x8_t <b><b>vreinterpretq_u16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u32" type="checkbox"><label for="vreinterpretq_p8_u32"><div>poly8x16_t <b><b>vreinterpretq_p8_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u32" type="checkbox"><label for="vreinterpretq_p16_u32"><div>poly16x8_t <b><b>vreinterpretq_p16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_u32" type="checkbox"><label for="vreinterpretq_u64_u32"><div>uint64x2_t <b><b>vreinterpretq_u64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u32" type="checkbox"><label for="vreinterpretq_s64_u32"><div>int64x2_t <b><b>vreinterpretq_s64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u32" type="checkbox"><label for="vreinterpretq_f64_u32"><div>float64x2_t <b><b>vreinterpretq_f64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u32" type="checkbox"><label for="vreinterpretq_p64_u32"><div>poly64x2_t <b><b>vreinterpretq_p64_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u32" type="checkbox"><label for="vreinterpretq_p128_u32"><div>poly128_t <b><b>vreinterpretq_p128_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u32" type="checkbox"><label for="vreinterpretq_f16_u32"><div>float16x8_t <b><b>vreinterpretq_f16_u32</b></b> (uint32x4_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.4S </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p8" type="checkbox"><label for="vreinterpretq_s8_p8"><div>int8x16_t <b><b>vreinterpretq_s8_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p8" type="checkbox"><label for="vreinterpretq_s16_p8"><div>int16x8_t <b><b>vreinterpretq_s16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p8" type="checkbox"><label for="vreinterpretq_s32_p8"><div>int32x4_t <b><b>vreinterpretq_s32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_p8" type="checkbox"><label for="vreinterpretq_f32_p8"><div>float32x4_t <b><b>vreinterpretq_f32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p8" type="checkbox"><label for="vreinterpretq_u8_p8"><div>uint8x16_t <b><b>vreinterpretq_u8_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p8" type="checkbox"><label for="vreinterpretq_u16_p8"><div>uint16x8_t <b><b>vreinterpretq_u16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p8" type="checkbox"><label for="vreinterpretq_u32_p8"><div>uint32x4_t <b><b>vreinterpretq_u32_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p8" type="checkbox"><label for="vreinterpretq_p16_p8"><div>poly16x8_t <b><b>vreinterpretq_p16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p8" type="checkbox"><label for="vreinterpretq_u64_p8"><div>uint64x2_t <b><b>vreinterpretq_u64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p8" type="checkbox"><label for="vreinterpretq_s64_p8"><div>int64x2_t <b><b>vreinterpretq_s64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p8" type="checkbox"><label for="vreinterpretq_f64_p8"><div>float64x2_t <b><b>vreinterpretq_f64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_p8" type="checkbox"><label for="vreinterpretq_p64_p8"><div>poly64x2_t <b><b>vreinterpretq_p64_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_p8" type="checkbox"><label for="vreinterpretq_p128_p8"><div>poly128_t <b><b>vreinterpretq_p128_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p8" type="checkbox"><label for="vreinterpretq_f16_p8"><div>float16x8_t <b><b>vreinterpretq_f16_p8</b></b> (poly8x16_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.16B </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p16" type="checkbox"><label for="vreinterpretq_s8_p16"><div>int8x16_t <b><b>vreinterpretq_s8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p16" type="checkbox"><label for="vreinterpretq_s16_p16"><div>int16x8_t <b><b>vreinterpretq_s16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p16" type="checkbox"><label for="vreinterpretq_s32_p16"><div>int32x4_t <b><b>vreinterpretq_s32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_p16" type="checkbox"><label for="vreinterpretq_f32_p16"><div>float32x4_t <b><b>vreinterpretq_f32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p16" type="checkbox"><label for="vreinterpretq_u8_p16"><div>uint8x16_t <b><b>vreinterpretq_u8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p16" type="checkbox"><label for="vreinterpretq_u16_p16"><div>uint16x8_t <b><b>vreinterpretq_u16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p16" type="checkbox"><label for="vreinterpretq_u32_p16"><div>uint32x4_t <b><b>vreinterpretq_u32_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p16" type="checkbox"><label for="vreinterpretq_p8_p16"><div>poly8x16_t <b><b>vreinterpretq_p8_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p16" type="checkbox"><label for="vreinterpretq_u64_p16"><div>uint64x2_t <b><b>vreinterpretq_u64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p16" type="checkbox"><label for="vreinterpretq_s64_p16"><div>int64x2_t <b><b>vreinterpretq_s64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p16" type="checkbox"><label for="vreinterpretq_f64_p16"><div>float64x2_t <b><b>vreinterpretq_f64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_p16" type="checkbox"><label for="vreinterpretq_p64_p16"><div>poly64x2_t <b><b>vreinterpretq_p64_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_p16" type="checkbox"><label for="vreinterpretq_p128_p16"><div>poly128_t <b><b>vreinterpretq_p128_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p16" type="checkbox"><label for="vreinterpretq_f16_p16"><div>float16x8_t <b><b>vreinterpretq_f16_p16</b></b> (poly16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_u64" type="checkbox"><label for="vreinterpretq_s8_u64"><div>int8x16_t <b><b>vreinterpretq_s8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_u64" type="checkbox"><label for="vreinterpretq_s16_u64"><div>int16x8_t <b><b>vreinterpretq_s16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_u64" type="checkbox"><label for="vreinterpretq_s32_u64"><div>int32x4_t <b><b>vreinterpretq_s32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_u64" type="checkbox"><label for="vreinterpretq_f32_u64"><div>float32x4_t <b><b>vreinterpretq_f32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_u64" type="checkbox"><label for="vreinterpretq_u8_u64"><div>uint8x16_t <b><b>vreinterpretq_u8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_u64" type="checkbox"><label for="vreinterpretq_u16_u64"><div>uint16x8_t <b><b>vreinterpretq_u16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_u64" type="checkbox"><label for="vreinterpretq_u32_u64"><div>uint32x4_t <b><b>vreinterpretq_u32_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_u64" type="checkbox"><label for="vreinterpretq_p8_u64"><div>poly8x16_t <b><b>vreinterpretq_p8_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_u64" type="checkbox"><label for="vreinterpretq_p16_u64"><div>poly16x8_t <b><b>vreinterpretq_p16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_u64" type="checkbox"><label for="vreinterpretq_s64_u64"><div>int64x2_t <b><b>vreinterpretq_s64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_u64" type="checkbox"><label for="vreinterpretq_f64_u64"><div>float64x2_t <b><b>vreinterpretq_f64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_s64" type="checkbox"><label for="vreinterpretq_f64_s64"><div>float64x2_t <b><b>vreinterpretq_f64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_s64" type="checkbox"><label for="vreinterpretq_p64_s64"><div>poly64x2_t <b><b>vreinterpretq_p64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_s64" type="checkbox"><label for="vreinterpretq_p128_s64"><div>poly128_t <b><b>vreinterpretq_p128_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_u64" type="checkbox"><label for="vreinterpretq_p64_u64"><div>poly64x2_t <b><b>vreinterpretq_p64_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_u64" type="checkbox"><label for="vreinterpretq_p128_u64"><div>poly128_t <b><b>vreinterpretq_p128_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_u64" type="checkbox"><label for="vreinterpretq_f16_u64"><div>float16x8_t <b><b>vreinterpretq_f16_u64</b></b> (uint64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_s64" type="checkbox"><label for="vreinterpretq_s8_s64"><div>int8x16_t <b><b>vreinterpretq_s8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_s64" type="checkbox"><label for="vreinterpretq_s16_s64"><div>int16x8_t <b><b>vreinterpretq_s16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_s64" type="checkbox"><label for="vreinterpretq_s32_s64"><div>int32x4_t <b><b>vreinterpretq_s32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_s64" type="checkbox"><label for="vreinterpretq_f32_s64"><div>float32x4_t <b><b>vreinterpretq_f32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_s64" type="checkbox"><label for="vreinterpretq_u8_s64"><div>uint8x16_t <b><b>vreinterpretq_u8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_s64" type="checkbox"><label for="vreinterpretq_u16_s64"><div>uint16x8_t <b><b>vreinterpretq_u16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_s64" type="checkbox"><label for="vreinterpretq_u32_s64"><div>uint32x4_t <b><b>vreinterpretq_u32_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_s64" type="checkbox"><label for="vreinterpretq_p8_s64"><div>poly8x16_t <b><b>vreinterpretq_p8_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_s64" type="checkbox"><label for="vreinterpretq_p16_s64"><div>poly16x8_t <b><b>vreinterpretq_p16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_s64" type="checkbox"><label for="vreinterpretq_u64_s64"><div>uint64x2_t <b><b>vreinterpretq_u64_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p64" type="checkbox"><label for="vreinterpretq_u64_p64"><div>uint64x2_t <b><b>vreinterpretq_u64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_s64" type="checkbox"><label for="vreinterpretq_f16_s64"><div>float16x8_t <b><b>vreinterpretq_f16_s64</b></b> (int64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f16" type="checkbox"><label for="vreinterpretq_s8_f16"><div>int8x16_t <b><b>vreinterpretq_s8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f16" type="checkbox"><label for="vreinterpretq_s16_f16"><div>int16x8_t <b><b>vreinterpretq_s16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f16" type="checkbox"><label for="vreinterpretq_s32_f16"><div>int32x4_t <b><b>vreinterpretq_s32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_f16" type="checkbox"><label for="vreinterpretq_f32_f16"><div>float32x4_t <b><b>vreinterpretq_f32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f16" type="checkbox"><label for="vreinterpretq_u8_f16"><div>uint8x16_t <b><b>vreinterpretq_u8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f16" type="checkbox"><label for="vreinterpretq_u16_f16"><div>uint16x8_t <b><b>vreinterpretq_u16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f16" type="checkbox"><label for="vreinterpretq_u32_f16"><div>uint32x4_t <b><b>vreinterpretq_u32_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f16" type="checkbox"><label for="vreinterpretq_p8_f16"><div>poly8x16_t <b><b>vreinterpretq_p8_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f16" type="checkbox"><label for="vreinterpretq_p16_f16"><div>poly16x8_t <b><b>vreinterpretq_p16_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f16" type="checkbox"><label for="vreinterpretq_u64_f16"><div>uint64x2_t <b><b>vreinterpretq_u64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f16" type="checkbox"><label for="vreinterpretq_s64_f16"><div>int64x2_t <b><b>vreinterpretq_s64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>v7/A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_f16" type="checkbox"><label for="vreinterpretq_f64_f16"><div>float64x2_t <b><b>vreinterpretq_f64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p64_f16" type="checkbox"><label for="vreinterpretq_p64_f16"><div>poly64x2_t <b><b>vreinterpretq_p64_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p128_f16" type="checkbox"><label for="vreinterpretq_p128_f16"><div>poly128_t <b><b>vreinterpretq_p128_f16</b></b> (float16x8_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.8H </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_f64" type="checkbox"><label for="vreinterpret_s8_f64"><div>int8x8_t <b><b>vreinterpret_s8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_f64" type="checkbox"><label for="vreinterpret_s16_f64"><div>int16x4_t <b><b>vreinterpret_s16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_f64" type="checkbox"><label for="vreinterpret_s32_f64"><div>int32x2_t <b><b>vreinterpret_s32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_f64" type="checkbox"><label for="vreinterpret_u8_f64"><div>uint8x8_t <b><b>vreinterpret_u8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_f64" type="checkbox"><label for="vreinterpret_u16_f64"><div>uint16x4_t <b><b>vreinterpret_u16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_f64" type="checkbox"><label for="vreinterpret_u32_f64"><div>uint32x2_t <b><b>vreinterpret_u32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_f64" type="checkbox"><label for="vreinterpret_p8_f64"><div>poly8x8_t <b><b>vreinterpret_p8_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_f64" type="checkbox"><label for="vreinterpret_p16_f64"><div>poly16x4_t <b><b>vreinterpret_p16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_f64" type="checkbox"><label for="vreinterpret_u64_f64"><div>uint64x1_t <b><b>vreinterpret_u64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_f64" type="checkbox"><label for="vreinterpret_s64_f64"><div>int64x1_t <b><b>vreinterpret_s64_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_f64" type="checkbox"><label for="vreinterpret_f16_f64"><div>float16x4_t <b><b>vreinterpret_f16_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f32_f64" type="checkbox"><label for="vreinterpret_f32_f64"><div>float32x2_t <b><b>vreinterpret_f32_f64</b></b> (float64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_f64" type="checkbox"><label for="vreinterpretq_s8_f64"><div>int8x16_t <b><b>vreinterpretq_s8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_f64" type="checkbox"><label for="vreinterpretq_s16_f64"><div>int16x8_t <b><b>vreinterpretq_s16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_f64" type="checkbox"><label for="vreinterpretq_s32_f64"><div>int32x4_t <b><b>vreinterpretq_s32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_f64" type="checkbox"><label for="vreinterpretq_u8_f64"><div>uint8x16_t <b><b>vreinterpretq_u8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_f64" type="checkbox"><label for="vreinterpretq_u16_f64"><div>uint16x8_t <b><b>vreinterpretq_u16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_f64" type="checkbox"><label for="vreinterpretq_u32_f64"><div>uint32x4_t <b><b>vreinterpretq_u32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_f64" type="checkbox"><label for="vreinterpretq_p8_f64"><div>poly8x16_t <b><b>vreinterpretq_p8_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_f64" type="checkbox"><label for="vreinterpretq_p16_f64"><div>poly16x8_t <b><b>vreinterpretq_p16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_f64" type="checkbox"><label for="vreinterpretq_u64_f64"><div>uint64x2_t <b><b>vreinterpretq_u64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_f64" type="checkbox"><label for="vreinterpretq_s64_f64"><div>int64x2_t <b><b>vreinterpretq_s64_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_f64" type="checkbox"><label for="vreinterpretq_f16_f64"><div>float16x8_t <b><b>vreinterpretq_f16_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f32_f64" type="checkbox"><label for="vreinterpretq_f32_f64"><div>float32x4_t <b><b>vreinterpretq_f32_f64</b></b> (float64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s8_p64" type="checkbox"><label for="vreinterpret_s8_p64"><div>int8x8_t <b><b>vreinterpret_s8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s16_p64" type="checkbox"><label for="vreinterpret_s16_p64"><div>int16x4_t <b><b>vreinterpret_s16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s32_p64" type="checkbox"><label for="vreinterpret_s32_p64"><div>int32x2_t <b><b>vreinterpret_s32_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u8_p64" type="checkbox"><label for="vreinterpret_u8_p64"><div>uint8x8_t <b><b>vreinterpret_u8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u16_p64" type="checkbox"><label for="vreinterpret_u16_p64"><div>uint16x4_t <b><b>vreinterpret_u16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u32_p64" type="checkbox"><label for="vreinterpret_u32_p64"><div>uint32x2_t <b><b>vreinterpret_u32_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.2S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p8_p64" type="checkbox"><label for="vreinterpret_p8_p64"><div>poly8x8_t <b><b>vreinterpret_p8_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.8B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_p16_p64" type="checkbox"><label for="vreinterpret_p16_p64"><div>poly16x4_t <b><b>vreinterpret_p16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_u64_p64" type="checkbox"><label for="vreinterpret_u64_p64"><div>uint64x1_t <b><b>vreinterpret_u64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_s64_p64" type="checkbox"><label for="vreinterpret_s64_p64"><div>int64x1_t <b><b>vreinterpret_s64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f64_p64" type="checkbox"><label for="vreinterpret_f64_p64"><div>float64x1_t <b><b>vreinterpret_f64_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.1D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpret_f16_p64" type="checkbox"><label for="vreinterpret_f16_p64"><div>float16x4_t <b><b>vreinterpret_f16_p64</b></b> (poly64x1_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1D </pre>      <h4>Results</h4>      <pre>Vd.4H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p64" type="checkbox"><label for="vreinterpretq_s8_p64"><div>int8x16_t <b><b>vreinterpretq_s8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p64" type="checkbox"><label for="vreinterpretq_s16_p64"><div>int16x8_t <b><b>vreinterpretq_s16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p64" type="checkbox"><label for="vreinterpretq_s32_p64"><div>int32x4_t <b><b>vreinterpretq_s32_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p64" type="checkbox"><label for="vreinterpretq_u8_p64"><div>uint8x16_t <b><b>vreinterpretq_u8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p64" type="checkbox"><label for="vreinterpretq_u16_p64"><div>uint16x8_t <b><b>vreinterpretq_u16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p64" type="checkbox"><label for="vreinterpretq_u32_p64"><div>uint32x4_t <b><b>vreinterpretq_u32_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p64" type="checkbox"><label for="vreinterpretq_p8_p64"><div>poly8x16_t <b><b>vreinterpretq_p8_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p64" type="checkbox"><label for="vreinterpretq_p16_p64"><div>poly16x8_t <b><b>vreinterpretq_p16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p64" type="checkbox"><label for="vreinterpretq_u64_p64"><div>uint64x2_t <b><b>vreinterpretq_u64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p64" type="checkbox"><label for="vreinterpretq_s64_p64"><div>int64x2_t <b><b>vreinterpretq_s64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p64" type="checkbox"><label for="vreinterpretq_f64_p64"><div>float64x2_t <b><b>vreinterpretq_f64_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p64" type="checkbox"><label for="vreinterpretq_f16_p64"><div>float16x8_t <b><b>vreinterpretq_f16_p64</b></b> (poly64x2_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.2D </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s8_p128" type="checkbox"><label for="vreinterpretq_s8_p128"><div>int8x16_t <b><b>vreinterpretq_s8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s16_p128" type="checkbox"><label for="vreinterpretq_s16_p128"><div>int16x8_t <b><b>vreinterpretq_s16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s32_p128" type="checkbox"><label for="vreinterpretq_s32_p128"><div>int32x4_t <b><b>vreinterpretq_s32_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u8_p128" type="checkbox"><label for="vreinterpretq_u8_p128"><div>uint8x16_t <b><b>vreinterpretq_u8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u16_p128" type="checkbox"><label for="vreinterpretq_u16_p128"><div>uint16x8_t <b><b>vreinterpretq_u16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u32_p128" type="checkbox"><label for="vreinterpretq_u32_p128"><div>uint32x4_t <b><b>vreinterpretq_u32_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p8_p128" type="checkbox"><label for="vreinterpretq_p8_p128"><div>poly8x16_t <b><b>vreinterpretq_p8_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_p16_p128" type="checkbox"><label for="vreinterpretq_p16_p128"><div>poly16x8_t <b><b>vreinterpretq_p16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_u64_p128" type="checkbox"><label for="vreinterpretq_u64_p128"><div>uint64x2_t <b><b>vreinterpretq_u64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_s64_p128" type="checkbox"><label for="vreinterpretq_s64_p128"><div>int64x2_t <b><b>vreinterpretq_s64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f64_p128" type="checkbox"><label for="vreinterpretq_f64_p128"><div>float64x2_t <b><b>vreinterpretq_f64_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.2D &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A64</p> </article>  </div><div class="intrinsic"><input id="vreinterpretq_f16_p128" type="checkbox"><label for="vreinterpretq_f16_p128"><div>float16x8_t <b><b>vreinterpretq_f16_p128</b></b> (poly128_t a)<span class="right">Vector reinterpret cast operation</span></div></label><article>      <h4>Description</h4><p></p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/nop-no-operation">NOP</a> 
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vd.1Q </pre>      <h4>Results</h4>      <pre>Vd.8H &rarr; result
+</pre>       <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vldrq_p128" type="checkbox"><label for="vldrq_p128"><div>poly128_t <b><b>vldrq_p128</b></b> (poly128_t const * ptr)<span class="right">Load SIMD&amp;FP register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Load SIMD&amp;FP Register (register offset). This instruction loads a SIMD&amp;FP register from memory. The address that is used for the load is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/ldr-register-simdfp-load-simdfp-register-register-offset">LDR</a> Qd,[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(64) offset = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.ExtendReg.3" title="function: bits(N) ExtendReg(integer reg, ExtendType type, integer shift)">ExtendReg</a>(m, extend_type, shift);
+if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    boolean is_load_store = memop IN {<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>};
+    SetNotTagCheckedInstruction(is_load_store &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(64) address;
+bits(datasize) data;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+address = address + offset;
+
+case memop of
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = data;
+
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = data;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vstrq_p128" type="checkbox"><label for="vstrq_p128"><div>void <b><b>vstrq_p128</b></b> (poly128_t * ptr, poly128_t val)<span class="right">Store SIMD&amp;FP register</span></div></label><article>      <h4>Description</h4><p><p class="aml">Store SIMD&amp;FP register (register offset). This instruction stores a single SIMD&amp;FP register to memory. The address that is used for the store is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/str-register-simdfp-store-simdfp-register-register-offset">STR</a> Qt,[Xn]
+</pre>      <h4>Argument Preparation</h4><pre>ptr &rarr; Xn <br />
+val &rarr; Qt </pre>      <h4>Results</h4>      <pre>void &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(64) offset = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#impl-aarch64.ExtendReg.3" title="function: bits(N) ExtendReg(integer reg, ExtendType type, integer shift)">ExtendReg</a>(m, extend_type, shift);
+if <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.HaveMTEExt.0" title="function: boolean HaveMTEExt()">HaveMTEExt</a>() then
+    boolean is_load_store = memop IN {<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>};
+    SetNotTagCheckedInstruction(is_load_store &amp;&amp; n == 31);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(64) address;
+bits(datasize) data;
+
+if n == 31 then
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.CheckSPAlignment.0" title="function: CheckSPAlignment()">CheckSPAlignment</a>();
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.SP.read.0" title="accessor: bits(width) SP[]">SP</a>[];
+else
+    address = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];
+
+address = address + offset;
+
+case memop of
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_STORE" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_STORE</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[t];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.write.3" title="accessor: Mem[bits(64) address, integer size, AccType acctype] = bits(size*8) value">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>] = data;
+
+    when <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-instrs-pseudocode#MemOp_LOAD" title="enumeration MemOp {MemOp_LOAD, MemOp_STORE, MemOp_PREFETCH}">MemOp_LOAD</a>
+        data = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Mem.read.3" title="accessor: bits(size*8) Mem[bits(64) address, integer size, AccType acctype]">Mem</a>[address, datasize DIV 8, <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#AccType_VEC" title="enumeration AccType {AccType_NORMAL, AccType_VEC, AccType_STREAM, AccType_VECSTREAM, AccType_ATOMIC, AccType_ATOMICRW, AccType_ORDERED, AccType_ORDEREDRW, AccType_ORDEREDATOMIC, AccType_ORDEREDATOMICRW,
+ AccType_LIMITEDORDERED, AccType_UNPRIV, AccType_IFETCH, AccType_PTW, AccType_NONFAULT, AccType_CNOTFIRST, AccType_NV2REGISTER,   AccType_DC, AccType_DC_UNPRIV, AccType_IC, AccType_DCZVA, AccType_AT}">AccType_VEC</a>];
+        <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[t] = data;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaeseq_u8" type="checkbox"><label for="vaeseq_u8"><div>uint8x16_t <b><b>vaeseq_u8</b></b> (uint8x16_t data, uint8x16_t key)<span class="right">AES single round encryption</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES single round encryption.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aese-aes-single-round-encryption">AESE</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vd.16B <br />
+key &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = operand1 EOR operand2;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESSubBytes.1" title="function: bits(128) AESSubBytes(bits(128) op)">AESSubBytes</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESShiftRows.1" title="function: bits(128) AESShiftRows(bits(128) op)">AESShiftRows</a>(result));
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesdq_u8" type="checkbox"><label for="vaesdq_u8"><div>uint8x16_t <b><b>vaesdq_u8</b></b> (uint8x16_t data, uint8x16_t key)<span class="right">AES single round decryption</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES single round decryption.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesd-aes-single-round-decryption">AESD</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vd.16B <br />
+key &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = operand1 EOR operand2;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvSubBytes.1" title="function: bits(128) AESInvSubBytes(bits(128) op)">AESInvSubBytes</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvShiftRows.1" title="function: bits(128) AESInvShiftRows(bits(128) op)">AESInvShiftRows</a>(result));
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesmcq_u8" type="checkbox"><label for="vaesmcq_u8"><div>uint8x16_t <b><b>vaesmcq_u8</b></b> (uint8x16_t data)<span class="right">AES mix columns</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES mix columns.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesmc-aes-mix-columns">AESMC</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESMixColumns.1" title="function: bits(128) AESMixColumns(bits (128) op)">AESMixColumns</a>(operand);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vaesimcq_u8" type="checkbox"><label for="vaesimcq_u8"><div>uint8x16_t <b><b>vaesimcq_u8</b></b> (uint8x16_t data)<span class="right">AES inverse mix columns</span></div></label><article>      <h4>Description</h4><p><p class="aml">AES inverse mix columns.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/aesimc-aes-inverse-mix-columns">AESIMC</a> Vd.16B,Vn.16B
+</pre>      <h4>Argument Preparation</h4><pre>data &rarr; Vn.16B </pre>      <h4>Results</h4>      <pre>Vd.16B &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.AESInvMixColumns.1" title="function: bits(128) AESInvMixColumns(bits (128) op)">AESInvMixColumns</a>(operand);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1cq_u32" type="checkbox"><label for="vsha1cq_u32"><div>uint32x4_t <b><b>vsha1cq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (choose)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (choose).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1c-sha1-hash-update-choose">SHA1C</a> Qd,Sn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_e &rarr; Sn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
+bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAchoose.3" title="function: bits(32) SHAchoose(bits(32) x, bits(32) y, bits(32) z)">SHAchoose</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
+    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
+    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
+    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1pq_u32" type="checkbox"><label for="vsha1pq_u32"><div>uint32x4_t <b><b>vsha1pq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (parity)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (parity).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1p-sha1-hash-update-parity">SHA1P</a> Qd,Sn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_e &rarr; Sn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
+bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAparity.3" title="function: bits(32) SHAparity(bits(32) x, bits(32) y, bits(32) z)">SHAparity</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
+    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
+    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
+    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1mq_u32" type="checkbox"><label for="vsha1mq_u32"><div>uint32x4_t <b><b>vsha1mq_u32</b></b> (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)<span class="right">SHA1 hash update (majority)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 hash update (majority).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1m-sha1-hash-update-majority">SHA1M</a> Qd,Sn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_e &rarr; Sn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) X = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(32) Y = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // Note: 32 not 128 bits wide
+bits(128) W = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHAmajority.3" title="function: bits(32) SHAmajority(bits(32) x, bits(32) y, bits(32) z)">SHAmajority</a>(X&lt;63:32&gt;, X&lt;95:64&gt;, X&lt;127:96&gt;);
+    Y = Y + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;31:0&gt;, 5) + t + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[W, e, 32];
+    X&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(X&lt;63:32&gt;, 30);
+    &lt;Y, X&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(Y:X, 32);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = X;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1h_u32" type="checkbox"><label for="vsha1h_u32"><div>uint32_t <b><b>vsha1h_u32</b></b> (uint32_t hash_e)<span class="right">SHA1 fixed rotate</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 fixed rotate.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1h-sha1-fixed-rotate">SHA1H</a> Sd,Sn
+</pre>      <h4>Argument Preparation</h4><pre>hash_e &rarr; Sn </pre>      <h4>Results</h4>      <pre>Sd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(32) operand = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];    // read element [0] only,  [1-3] zeroed
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(operand, 30);</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1su0q_u32" type="checkbox"><label for="vsha1su0q_u32"><div>uint32x4_t <b><b>vsha1su0q_u32</b></b> (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)<span class="right">SHA1 schedule update 0</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 schedule update 0.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1su0-sha1-schedule-update-0">SHA1SU0</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>w0_3 &rarr; Vd.4S <br />
+w4_7 &rarr; Vn.4S <br />
+w8_11 &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128) result;
+
+result = operand2&lt;63:0&gt;:operand1&lt;127:64&gt;;
+result = result EOR operand1 EOR operand3;
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha1su1q_u32" type="checkbox"><label for="vsha1su1q_u32"><div>uint32x4_t <b><b>vsha1su1q_u32</b></b> (uint32x4_t tw0_3, uint32x4_t w12_15)<span class="right">SHA1 schedule update 1</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA1 schedule update 1.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha1su1-sha1-schedule-update-1">SHA1SU1</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>tw0_3 &rarr; Vd.4S <br />
+w12_15 &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+bits(128) T = operand1 EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(operand2, 32);
+result&lt;31:0&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;31:0&gt;, 1);
+result&lt;63:32&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;63:32&gt;, 1);
+result&lt;95:64&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;95:64&gt;, 1);
+result&lt;127:96&gt; = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;127:96&gt;, 1) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROL.2" title="function: bits(N) ROL(bits(N) x, integer shift)">ROL</a>(T&lt;31:0&gt;, 2);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256hq_u32" type="checkbox"><label for="vsha256hq_u32"><div>uint32x4_t <b><b>vsha256hq_u32</b></b> (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)<span class="right">SHA256 hash update (part 1)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 hash update (part 1).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256h-sha256-hash-update-part-1">SHA256H</a> Qd,Qn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_abcd &rarr; Qd <br />
+hash_efgh &rarr; Qn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHA256hash.4" title="function: bits(128) SHA256hash(bits (128) X, bits(128) Y, bits(128) W, boolean part1)">SHA256hash</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m], TRUE);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256h2q_u32" type="checkbox"><label for="vsha256h2q_u32"><div>uint32x4_t <b><b>vsha256h2q_u32</b></b> (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)<span class="right">SHA256 hash update (part 2)</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 hash update (part 2).</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256h2-sha256-hash-update-part-2">SHA256H2</a> Qd,Qn,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>hash_efgh &rarr; Qd <br />
+hash_abcd &rarr; Qn <br />
+wk &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Qd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) result;
+result = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.SHA256hash.4" title="function: bits(128) SHA256hash(bits (128) X, bits(128) Y, bits(128) W, boolean part1)">SHA256hash</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d], <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m], FALSE);
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256su0q_u32" type="checkbox"><label for="vsha256su0q_u32"><div>uint32x4_t <b><b>vsha256su0q_u32</b></b> (uint32x4_t w0_3, uint32x4_t w4_7)<span class="right">SHA256 schedule update 0</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 schedule update 0.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256su0-sha256-schedule-update-0">SHA256SU0</a> Vd.4S,Vn.4S
+</pre>      <h4>Argument Preparation</h4><pre>w0_3 &rarr; Vd.4S <br />
+w4_7 &rarr; Vn.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) result;
+bits(128) T = operand2&lt;31:0&gt;:operand1&lt;127:32&gt;;
+bits(32) elt;
+
+for e = 0 to 3
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T, e, 32];
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 7) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 18) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 3);
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32];
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vsha256su1q_u32" type="checkbox"><label for="vsha256su1q_u32"><div>uint32x4_t <b><b>vsha256su1q_u32</b></b> (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)<span class="right">SHA256 schedule update 1</span></div></label><article>      <h4>Description</h4><p><p class="aml">SHA256 schedule update 1.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/sha256su1-sha256-schedule-update-1">SHA256SU1</a> Vd.4S,Vn.4S,Vm.4S
+</pre>      <h4>Argument Preparation</h4><pre>tw0_3 &rarr; Vd.4S <br />
+w8_11 &rarr; Vn.4S <br />
+w12_15 &rarr; Vm.4S </pre>      <h4>Results</h4>      <pre>Vd.4S &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#AArch64.CheckFPAdvSIMDEnabled.0" title="function: AArch64.CheckFPAdvSIMDEnabled()">AArch64.CheckFPAdvSIMDEnabled</a>();
+
+bits(128) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[d];
+bits(128) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[n];
+bits(128) operand3 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.read.1" title="accessor: bits(width) V[integer n]">V</a>[m];
+bits(128) result;
+bits(128) T0 = operand3&lt;31:0&gt;:operand2&lt;127:32&gt;;
+bits(64) T1;
+bits(32) elt;
+
+T1 = operand3&lt;127:64&gt;;
+for e = 0 to 1
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T1, e, 32];
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 17) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 19) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 10);
+    elt = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32] + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T0, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt;
+
+T1 = result&lt;63:0&gt;;
+for e = 2 to 3
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T1, e-2, 32];
+    elt = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 17) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.ROR.2" title="function: bits(N) ROR(bits(N) x, integer shift)">ROR</a>(elt, 19) EOR <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.LSR.2" title="function: bits(N) LSR(bits(N) x, integer shift)">LSR</a>(elt, 10);
+    elt = elt + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, 32] + <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[T0, e, 32];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = elt;
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_p64" type="checkbox"><label for="vmull_p64"><div>poly128_t <b><b>vmull_p64</b></b> (poly64_t a, poly64_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL</a> Vd.1Q,Vn.1D,Vm.1D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.1D <br />
+b &rarr; Vm.1D </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="vmull_high_p64" type="checkbox"><label for="vmull_high_p64"><div>poly128_t <b><b>vmull_high_p64</b></b> (poly64x2_t a, poly64x2_t b)<span class="right">Polynomial multiply long</span></div></label><article>      <h4>Description</h4><p><p class="aml">Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&amp;FP registers, places the results in a vector, and writes the vector to the destination SIMD&amp;FP register. The destination vector elements are twice as long as the elements that are multiplied.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/pmull-pmull2-polynomial-multiply-long">PMULL2</a> Vd.1Q,Vn.2D,Vm.2D
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Vn.2D <br />
+b &rarr; Vm.2D </pre>      <h4>Results</h4>      <pre>Vd.1Q &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock"><a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-exceptions-pseudocode#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
+bits(datasize) operand1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[n, part];
+bits(datasize) operand2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.Vpart.read.2" title="accessor: bits(width) Vpart[integer n, integer part]">Vpart</a>[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, e, esize];
+    element2 = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, e, esize];
+    <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 2*esize] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.PolynomialMult.2" title="function: bits(M+N) PolynomialMult(bits(M) op1, bits(N) op2)">PolynomialMult</a>(element1, element2);
+
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-simd-and-floating-point-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.V.write.1" title="accessor: V[integer n] = bits(width) value">V</a>[d] = result;</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32b" type="checkbox"><label for="__crc32b"><div>uint32_t <b><b>__crc32b</b></b> (uint32_t a, uint8_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32B</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32h" type="checkbox"><label for="__crc32h"><div>uint32_t <b><b>__crc32h</b></b> (uint32_t a, uint16_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32H</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32w" type="checkbox"><label for="__crc32w"><div>uint32_t <b><b>__crc32w</b></b> (uint32_t a, uint32_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32W</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32d" type="checkbox"><label for="__crc32d"><div>uint32_t <b><b>__crc32d</b></b> (uint32_t a, uint64_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32b-crc32h-crc32w-crc32x-crc32-checksum">CRC32X</a> Wd,Wn,Xm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Xm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x04C11DB7&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cb" type="checkbox"><label for="__crc32cb"><div>uint32_t <b><b>__crc32cb</b></b> (uint32_t a, uint8_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CB</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32ch" type="checkbox"><label for="__crc32ch"><div>uint32_t <b><b>__crc32ch</b></b> (uint32_t a, uint16_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CH</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cw" type="checkbox"><label for="__crc32cw"><div>uint32_t <b><b>__crc32cw</b></b> (uint32_t a, uint32_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CW</a> Wd,Wn,Wm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Wm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div><div class="intrinsic"><input id="__crc32cd" type="checkbox"><label for="__crc32cd"><div>uint32_t <b><b>__crc32cd</b></b> (uint32_t a, uint64_t b)<span class="right"><span class="asm-code">CRC32 checksum</span></div></label><article>      <h4>Description</h4><p><p class="aml"><span class="asm-code">CRC32</span> checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.</p>
+</p>      <h4>A64 Instruction</h4><pre><a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/crc32cb-crc32ch-crc32cw-crc32cx-crc32c-checksum">CRC32CX</a> Wd,Wn,Xm
+</pre>      <h4>Argument Preparation</h4><pre>a &rarr; Wn <br />
+b &rarr; Xm </pre>      <h4>Results</h4>      <pre>Wd &rarr; result
+</pre>  <h4>Operation</h4>
+<pre class="codeblock">bits(32) acc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[n];    // accumulator
+bits(size) val = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.read.1" title="accessor: bits(width) X[integer n]">X</a>[m];    // input value
+bits(32) poly = 0x1EDC6F41&lt;31:0&gt;;
+
+bits(32+size) tempacc = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(acc):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(size);
+bits(size+32) tempval = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(val):<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/aarch64-functions-pseudocode#impl-aarch64.X.write.1" title="accessor: X[integer n] = bits(width) value">X</a>[d] = <a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.BitReverse.1" title="function: bits(N) BitReverse(bits(N) data)">BitReverse</a>(<a href="https://developer.arm.com/docs/ddi0596/a/a64-base-instructions-alphabetic-order/../a64-shared-pseudocode-functions/shared-functions-pseudocode#impl-shared.Poly32Mod2.2" title="function: bits(32) Poly32Mod2(bits(N) data, bits(32) poly)">Poly32Mod2</a>(tempacc EOR tempval, poly));</pre>
+      <h4>Supported architectures</h4>      <p>A32/A64</p> </article>  </div>
+</section>
+            
+        </div>
+    </div>    
+</div>
+
+</div>
+<!-- END ProductItemContent -->
+
+</main>
+
+
+<footer class="c-footer u-no-print" id="footer">
+
+    <!-- START Newsletter -->
+    <div class="c-footer__newsletter">
+        <div class="row">
+            <div class="columns">
+                <!-- START NewsLetterHorizontal -->
+<div class="c-widget c-newsletter-widget is-horizontal">
+    
+    
+    <!-- Newsletter form -->
+    <div class="row">
+        <div class="large-4 columns">
+            <h4 class="c-newsletter-widget__title">Stay Informed</h4>
+            <span class="c-newsletter-widget__description">Sign up for news and updates.</span>
+        </div>
+        <div class="large-8 columns">
+            <div class="row">
+                <form class="js-newsletter__form" data-abide>
+                    <div class="large-3 columns">
+                        <label for="news_sign_first_name" class="hide">First Name</label>
+                        <input type="text" name="First Name" id="news_sign_first_id" class="input" placeholder="First Name" required />
+                        <small class="error">Please enter your first name.</small>
+                    </div>
+                    <div class="large-3 columns">
+                        <label for="news_sign_last_name" class="hide">Last Name</label>
+                        <input type="text" name="Last Name" id="news_sign_last_id" class="input" placeholder="Last Name" required />
+                        <small class="error">Please enter your last name.</small>
+                    </div>
+                    <div class="large-4 columns">
+                        <label for="news_sign_email_address" class="hide">E-Mail</label>
+                        <input type="email" name="Email Address" id="news_sign_email_address_id" class="input" placeholder="E-Mail" required />
+                        <small class="error">Please enter your e-mail.</small>
+                    </div>
+                    <div class="large-2 columns end">
+                        <input type="submit" value="Sign up" class="c-button" style="margin-bottom: 0; margin-top: 0;" />
+                    </div>
+                </form>
+            </div>
+        </div>
+    </div>
+    <!-- END newsletter form -->
+
+    <!-- START newsletter modal window -->
+    <div class="reveal-modal medium" id="newsletterModal" data-reveal aria-hidden="true" role="dialog">
+        <div class="modal-hider">
+            <iframe width="100%" height="100%" data-src="/forms/newsletter-signup"></iframe>
+        </div>
+        <p>
+            <a class="close-reveal-modal" aria-label="Close">&#215;</a>
+        </p>
+    </div>
+    <!-- END newsletter modal window -->
+
+</div>
+<!-- End NewsLetterHorizontal -->
+
+            </div>
+        </div>
+    </div>
+    <!-- END Newsletter -->
+
+    <!-- START Internal Footer -->
+    <div class="c-footer__internal">
+        <div class="row small-text-center large-text-left">
+            <div class="large-3 columns spacing-3 pushing-3">
+                <!-- START Footer Section -->
+            <h3 class="c-footer-section__title">
+Arm Developer            </h3>
+            <div class="row">
+                <div class="columns">
+                    <ul class="o-list c-footer-section__list">
+                            <li><a class="c-footer-section__link" href="/embedded" title="Embedded Software Developers">Embedded Software Developers</a></li>
+    <li><a class="c-footer-section__link" href="/open-source" title="Linux and Open Source">Linux and Open Source</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/resources/education" title="Education">Education</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/resources/research" title="Research">Research</a></li>
+    <li><a class="c-footer-section__link" href="/graphics" title="Graphics and Multimedia Development">Graphics and Multimedia Development</a></li>
+    <li><a class="c-footer-section__link" href="/soc" title="SoC Design">SoC Design</a></li>
+    <li><a class="c-footer-section__link" href="/hpc" title="High Performance Computing">High Performance Computing</a></li>
+
+
+                    </ul>
+                </div>
+            </div>
+            <h3 class="c-footer-section__title">
+                    <a href="/products/architecture" title="Architecture" class="home">Architecture</a>
+            </h3>
+            <div class="row">
+                <div class="columns">
+                    <ul class="o-list c-footer-section__list">
+                        
+        <li><a class="c-footer-section__link" href="/products/architecture/cpu-architecture" title="CPU Architecture">CPU Architecture</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/system-architectures" title="System Architectures">System Architectures</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/security-architectures" title="Security Architectures">Security Architectures</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/instruction-sets" title="Instruction Sets">Instruction Sets</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/platform-design" title="Platform Design">Platform Design</a></li>
+        <li><a class="c-footer-section__link" href="/products/architecture/reference-library" title="Reference Library">Reference Library</a></li>
+
+                    </ul>
+                </div>
+            </div>
+<!-- END Footer Section -->
+
+            </div>
+            <div class="large-offset-1 large-8 columns spacing-3 pushing-3">
+                <!-- START Internal Right -->
+
+
+            <h3 class="c-footer-section__title">
+                    <a href="/products" title="IP Products" class="home">IP Products</a>
+            </h3>
+<div class="row">                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/processors" title="Processors">
+                                    <strong class="c-footer-section__subtitle">Processors</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/processors/cortex-a" title="Cortex-A">Cortex-A</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/cortex-r" title="Cortex-R">Cortex-R</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/cortex-m" title="Cortex-M">Cortex-M</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/classic-processors" title="Classic Processors">Classic Processors</a></li>
+        <li><a class="c-footer-section__link" href="/products/processors/machine-learning" title="Machine Learning">Machine Learning</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/physical-ip" title="Physical IP">
+                                    <strong class="c-footer-section__subtitle">Physical IP</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/physical-ip/logic-ip" title="Logic IP">Logic IP</a></li>
+        <li><a class="c-footer-section__link" href="/products/physical-ip/memory-compilers" title="Memory Compilers">Memory Compilers</a></li>
+        <li><a class="c-footer-section__link" href="/products/physical-ip/interface-ip" title="Interface IP">Interface IP</a></li>
+        <li><a class="c-footer-section__link" href="/products/physical-ip/pop-ip" title="POP IP">POP IP</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/system-ip" title="System IP">
+                                    <strong class="c-footer-section__subtitle">System IP</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/system-ip/free-system-ip-whitepapers" title="Free System IP Whitepapers">Free System IP Whitepapers</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/corelink-interconnect" title="CoreLink Interconnect">CoreLink Interconnect</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/coresight-debug-and-trace" title="CoreSight Debug and Trace">CoreSight Debug and Trace</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/socrates-system-builder" title="Socrates System Builder">Socrates System Builder</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/memory-controllers" title="CoreLink Memory Controllers">CoreLink Memory Controllers</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/system-controllers" title="CoreLink System Controllers">CoreLink System Controllers</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-ip/security-ip" title="Security IP">Security IP</a></li>
+
+                    </ul>
+                </div>
+</div><div class="row">                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/graphics-and-multimedia" title="Graphics and Multimedia Processors">
+                                    <strong class="c-footer-section__subtitle">Graphics and Multimedia Processors</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-gpus" title="Mali GPUs">Mali GPUs</a></li>
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-video-processors" title="Mali Video Processors">Mali Video Processors</a></li>
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/mali-display-processors" title="Mali Display Processors">Mali Display Processors</a></li>
+        <li><a class="c-footer-section__link" href="/products/graphics-and-multimedia/assertive-display" title="Assertive Display">Assertive Display</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/system-design" title="System Design Tools">
+                                    <strong class="c-footer-section__subtitle">System Design Tools</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/system-design/subsystems" title="Subsystems">Subsystems</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/corstone-foundation-ip" title="Corstone Foundation IP">Corstone Foundation IP</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/system-guidance" title="System Guidance">System Guidance</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/fast-models" title="Fast Models">Fast Models</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/cycle-models" title="Cycle Models">Cycle Models</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/development-boards" title="Development Boards">Development Boards</a></li>
+        <li><a class="c-footer-section__link" href="/products/system-design/fixed-virtual-platforms" title="Fixed Virtual Platforms">Fixed Virtual Platforms</a></li>
+
+                    </ul>
+                </div>
+                <div class="large-4 medium-6 columns left">
+                    <ul class="o-list c-footer-section__list">
+                        <li>
+
+                            <a href="/products/software-development-tools" title="Software Tools">
+                                    <strong class="c-footer-section__subtitle">Software Tools</strong>
+                            </a>
+                        </li>
+                        
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/arm-development-studio" title="Arm Development Studio">Arm Development Studio</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/graphics-development-tools" title="Graphics Development Tools">Graphics Development Tools</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/buy" title="Buy">Buy</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/compilers" title="Compilers">Compilers</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/hpc" title="HPC">HPC</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/keil-mdk" title="Keil MDK">Keil MDK</a></li>
+        <li><a class="c-footer-section__link" href="/products/software-development-tools/debug-probes-and-adapters" title="Debug Probes and Adapters">Debug Probes and Adapters</a></li>
+
+                    </ul>
+                </div>
+</div><!-- END Internal Right -->
+
+            </div>
+        </div>
+    </div>
+    <!-- END Internal Footer -->
+
+    <!-- START External Footer -->
+    <div class="c-footer__external">
+
+        <!-- START External links -->
+        <div class="row small-text-center large-text-left">
+            <div class="large-4 spacing-3 columns">
+                <!-- START Footer Section -->
+            <h3 class="c-footer-section__title">
+Arm Corporate            </h3>
+            <div class="row">
+                <div class="columns">
+                    <ul class="o-list c-footer-section__list">
+                            <li><a class="c-footer-section__link" href="https://www.arm.com/" title="arm.com">arm.com</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company" title="Company Profile">Company Profile</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company/careers" title="Careers">Careers</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company/news" title="Newsroom">Newsroom</a></li>
+    <li><a class="c-footer-section__link" href="https://www.arm.com/company/offices" title="Our Offices">Our Offices</a></li>
+
+
+                    </ul>
+                </div>
+            </div>
+<!-- END Footer Section -->
+
+            </div>
+            <!-- START External Right -->
+        <div class="large-4 spacing-3 columns">
+                <h3 class="c-footer-section__title">
+More                </h3>
+                <div class="row">
+                    <div class="columns">
+                        <ul class="o-list c-footer-section__list">
+                                <li><a class="c-footer-section__link" href="http://community.arm.com/" title="Arm Community">Arm Community</a></li>
+    <li><a class="c-footer-section__link" href="/support" title="Support">Support</a></li>
+    <li><a class="c-footer-section__link" href="/products/designstart" title="DesignStart">DesignStart</a></li>
+    <li><a class="c-footer-section__link" href="http://www.keil.com/" title="Keil Tools">Keil Tools</a></li>
+    <li><a class="c-footer-section__link" href="/graphics" title="Mali Developer">Mali Developer</a></li>
+
+
+                        </ul>
+                    </div>
+                </div>
+        </div>
+        <div class="large-4 spacing-3 columns">
+                <h3 class="c-footer-section__title">
+Social                </h3>
+                <div class="row">
+                    <div class="columns">
+                        <ul class="o-list c-footer-section__list">
+                                <li><a class="c-footer-section__link" href="https://www.facebook.com/pg/ARM-71946799587/" title="Facebook">Facebook</a></li>
+    <li><a class="c-footer-section__link" href="http://www.linkedin.com/company/Arm" title="LinkedIn">LinkedIn</a></li>
+    <li><a class="c-footer-section__link" href="https://twitter.com/Arm" title="Twitter">Twitter</a></li>
+    <li><a class="c-footer-section__link" href="https://www.youtube.com/user/Armflix" title="YouTube">YouTube</a></li>
+    <li><a class="c-footer-section__link" href="http://i.youku.com/armchina" title="优酷 (YouKu)">优酷 (YouKu)</a></li>
+    <li><a class="c-footer-section__link" href="http://weibo.com/armcn" title="@Arm中国 (Arm Sina)">@Arm中国 (Arm Sina)</a></li>
+
+
+                        </ul>
+                    </div>
+                </div>
+        </div>
+<!-- END External Right -->
+
+        </div>
+        <!-- END External Links -->
+
+        <!-- START Trademark Statement -->
+        <div class="row small-text-center large-text-left">
+            <div class="columns spacing-1">
+                <div class="c-footer__trademark">
+                    AMBA, Arm, Arm7, Arm9, Arm11, Artisan, big.LITTLE, Cordio, CoreLink, CoreSight,
+Cortex, DesignStart, Jazelle, Keil, Mali, Mbed, NEON, POP, SecurCore, Socrates,
+Thumb, TrustZone, ULINK, &#181;Vision, Versatile are trademarks or registered trademarks
+of Arm Limited (or its subsidiaries) in the US and/or elsewhere.
+The related technology may be protected by any or all of patents, copyrights,
+designs and trade secrets. All rights reserved. All other brands or product 
+names are the property of their respective holders. <a href="http://www.arm.com/about/trademarks/">Click here for further details</a>.
+                </div>
+            </div>
+        </div>
+        <!-- END Trademark Statement -->
+
+        <!-- START Legal -->
+        <div class="c-legal row small-text-center large-text-left" role="contentinfo">
+            <div class="large-1 spacing-3 columns">
+                <div class="c-legal__logo"></div>
+            </div>
+            <div class="large-8 spacing-3 columns">
+                    <p class="c-legal__links">
+
+                <a href="https://www.arm.com/company/policies/cookies">Cookie Policy</a>
+ |                <a href="https://www.arm.com/company/policies/terms-and-conditions">Terms of Use</a>
+ |                <a href="https://www.arm.com/company/policies/privacy">Privacy Policy</a>
+ |                <a href="https://www.arm.com/company/policies/accessibility">Accessibility</a>
+ |                <a href="https://login.arm.com/subscriptions.php">Subscription Center</a>
+ |                <a href="https://www.arm.com/company/policies/trademarks">Trademarks</a>
+        <br class="hide-for-large-up" />
+    </p>
+
+            </div>
+            <div class="large-3 spacing-3 columns">
+                <p class="c-legal__copyright">Copyright &#169; 1995-2018 Arm Limited (or its affiliates). All rights reserved. </p>
+            </div>
+        </div>
+        <!-- END Legal -->
+
+    </div>
+    <!-- END External Footer -->
+
+</footer>
+
+
+
+    <div class="c-component c-policies u-no-print" role="contentinfo">
+            <div class="c-component c-policy c-cookie-policy js-policy" data-key="com.arm.accepted.cookie" data-updated="01/02/2018 16:28:25" data-iscookiepolicy="true" title="Cookie Policy" role="alert" style="display: none;">
+        <div class="row">
+            <div class="small-12 large-9 small-text-center large-text-left columns">
+                <p>Important Information for the Arm website. This site uses cookies to store information on your computer. By continuing to use our site, you consent to our cookies. If you are not happy with the use of these cookies, please review our <a class="cookie-link" target="_blank" href="http://www.arm.com/about/cookie_policy.php" title="Cookie Policy">Cookie Policy</a> to learn how they can be disabled. By disabling cookies, some features of the site will not work.</p>
+            </div>
+            <div class="small-12 large-3 text-center columns">
+                <a class="c-button c-policy__accept-button js-accept-policy" tabindex="1" title="Accept and hide this message ">Accept and hide this message  <i class="fa fa-times"></i></a>
+            </div>
+        </div>
+    </div>
+
+        
+    </div>
+
+<script type="text/javascript" src="https://nebula-cdn.kampyle.com/we/8144/onsite/embed.js"></script>
+
+
+    
+
+<script src="/bundles/modernizr?v=inCVuEFe6J4Q07A0AcRsbJic_UE5MwpRMNGcOtk94TE1"></script>
+
+
+
+<script type="text/javascript">
+    if (Modernizr && !Modernizr.svg) {
+        var imgs = document.getElementsByTagName('img');
+        var svgExtension = /.*\.svg$/;
+        var l = imgs.length;
+        for (var i = 0; i < l; i++) {
+            if (imgs[i].src.match(svgExtension)) {
+                imgs[i].src = imgs[i].src.slice(0, -3) + 'png';
+            }
+        }
+    }
+</script>
+
+
+<script src="/shared/vendor/jquery-1.12.4.min.js"></script>
+<script src="/shared/vendor/foundation.min.js"></script>
+<script src="/shared/vendor/moment.min.js"></script>
+<script src="/shared/vendor/js/jquery-rss/src/jquery.rss.js"></script>
+
+<script src="/bundles/clipboard?v=IPc2U7tMxf_2TKh6_qbfzIsYI3pmBbWZxHb5M8V-fhg1"></script>
+
+<script src="/bundles/placeholder?v=Aw-bm4sJPSuBeTzPpRw_GfXYXI4wKmH607vgMic22c01"></script>
+
+<script src="/bundles/waypoints?v=E5Sm2NPVxzLqGyd5lIz-NjBvArn4w7w7IvCs35wz6dA1"></script>
+
+
+
+<script src="/shared/developer.arm.com/js/common.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+<script src="/shared/developer.arm.com/js/app.bundle.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+
+
+<script src="/shared/arm.com-new/js/app.constants.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+<script src="/shared/arm.com-new/js/app.navigation.js?v=09142182FF441DC932039AB1D8CD216F"></script>
+<script type="text/javascript">
+    (function() {
+        var $userMenu = $('.c-user-menu__root');
+        if ($userMenu) {
+            $userMenu.navigation();
+        }
+    })();
+</script>
+
+
+
+<script src="/bundles/jquery-ui?v=atr-jO-t-9RdxuVusckf7yNy0MEEBlVW5TaJCAetR6A1"></script>
+
+<script src="/bundles/jqueryval?v=shBfM8gvrYJt6eNs9xKMaOYfzyGdVGLhvPUMJ92MwmM1"></script>
+
+<script src="/sitecore%20modules/Web/Web%20Forms%20for%20Marketers/mvc/wffm.min.js"></script>
+<script>
+  $(document).ready(function() {
+    $("form[data-wffm]").each(function() { $(this).wffmForm(); });
+  });
+</script>
+
+<link rel="stylesheet" type="text/css" href="//fast.fonts.net/t/1.css?apiType=css&projectid=5616bfa5-8ba9-4061-8e15-3a2d29551ced" />
+
+
+<script src="//munchkin.marketo.net/munchkin.js" type="text/javascript"></script>
+<script type="text/javascript">
+    Munchkin.init('312-SAX-488', {'asyncOnly': true});
+</script>
+
+
+
+    
+    
+    
+</body>
+</html>
diff --git a/library/stdarch/crates/stdarch-verify/build.rs b/library/stdarch/crates/stdarch-verify/build.rs
new file mode 100644
index 000000000..c0dc81b6a
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/build.rs
@@ -0,0 +1,28 @@
+use std::path::Path;
+
+fn main() {
+    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
+    let root = dir.parent().unwrap();
+    eprintln!("root: {}", root.display());
+    walk(&root.join("core_arch/src/x86"));
+    walk(&root.join("core_arch/src/x86_64"));
+    walk(&root.join("core_arch/src/arm"));
+    walk(&root.join("core_arch/src/aarch64"));
+}
+
+fn walk(root: &Path) {
+    for file in root.read_dir().unwrap() {
+        eprintln!("root: {}", root.display());
+        let file = file.unwrap();
+        if file.file_type().unwrap().is_dir() {
+            walk(&file.path());
+            continue;
+        }
+        let path = file.path();
+        if path.extension().and_then(|s| s.to_str()) != Some("rs") {
+            continue;
+        }
+
+        println!("cargo:rerun-if-changed={}", path.display());
+    }
+}
diff --git a/library/stdarch/crates/stdarch-verify/mips-msa.h b/library/stdarch/crates/stdarch-verify/mips-msa.h
new file mode 100644
index 000000000..881f1918f
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/mips-msa.h
@@ -0,0 +1,707 @@
+v16i8 __builtin_msa_add_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_add_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_add_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_add_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_adds_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_adds_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_adds_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_adds_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_adds_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_adds_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_adds_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_adds_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_adds_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_adds_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_adds_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_adds_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_addv_b (v16i8, v16i8);
+v8i16 __builtin_msa_addv_h (v8i16, v8i16);
+v4i32 __builtin_msa_addv_w (v4i32, v4i32);
+v2i64 __builtin_msa_addv_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_addvi_b (v16i8, imm0_31);
+v8i16 __builtin_msa_addvi_h (v8i16, imm0_31);
+v4i32 __builtin_msa_addvi_w (v4i32, imm0_31);
+v2i64 __builtin_msa_addvi_d (v2i64, imm0_31);
+
+v16u8 __builtin_msa_and_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_andi_b (v16u8, imm0_255);
+
+v16i8 __builtin_msa_asub_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_asub_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_asub_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_asub_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_asub_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_asub_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_asub_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_asub_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_ave_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_ave_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_ave_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_ave_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_ave_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_ave_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_ave_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_ave_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_aver_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_aver_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_aver_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_aver_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_aver_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_aver_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_aver_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_aver_u_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bclr_b (v16u8, v16u8);
+v8u16 __builtin_msa_bclr_h (v8u16, v8u16);
+v4u32 __builtin_msa_bclr_w (v4u32, v4u32);
+v2u64 __builtin_msa_bclr_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bclri_b (v16u8, imm0_7);
+v8u16 __builtin_msa_bclri_h (v8u16, imm0_15);
+v4u32 __builtin_msa_bclri_w (v4u32, imm0_31);
+v2u64 __builtin_msa_bclri_d (v2u64, imm0_63);
+
+v16u8 __builtin_msa_binsl_b (v16u8, v16u8, v16u8);
+v8u16 __builtin_msa_binsl_h (v8u16, v8u16, v8u16);
+v4u32 __builtin_msa_binsl_w (v4u32, v4u32, v4u32);
+v2u64 __builtin_msa_binsl_d (v2u64, v2u64, v2u64);
+
+v16u8 __builtin_msa_binsli_b (v16u8, v16u8, imm0_7);
+v8u16 __builtin_msa_binsli_h (v8u16, v8u16, imm0_15);
+v4u32 __builtin_msa_binsli_w (v4u32, v4u32, imm0_31);
+v2u64 __builtin_msa_binsli_d (v2u64, v2u64, imm0_63);
+
+v16u8 __builtin_msa_binsr_b (v16u8, v16u8, v16u8);
+v8u16 __builtin_msa_binsr_h (v8u16, v8u16, v8u16);
+v4u32 __builtin_msa_binsr_w (v4u32, v4u32, v4u32);
+v2u64 __builtin_msa_binsr_d (v2u64, v2u64, v2u64);
+
+v16u8 __builtin_msa_binsri_b (v16u8, v16u8, imm0_7);
+v8u16 __builtin_msa_binsri_h (v8u16, v8u16, imm0_15);
+v4u32 __builtin_msa_binsri_w (v4u32, v4u32, imm0_31);
+v2u64 __builtin_msa_binsri_d (v2u64, v2u64, imm0_63);
+
+v16u8 __builtin_msa_bmnz_v (v16u8, v16u8, v16u8);
+
+v16u8 __builtin_msa_bmnzi_b (v16u8, v16u8, imm0_255);
+
+v16u8 __builtin_msa_bmz_v (v16u8, v16u8, v16u8);
+
+v16u8 __builtin_msa_bmzi_b (v16u8, v16u8, imm0_255);
+
+v16u8 __builtin_msa_bneg_b (v16u8, v16u8);
+v8u16 __builtin_msa_bneg_h (v8u16, v8u16);
+v4u32 __builtin_msa_bneg_w (v4u32, v4u32);
+v2u64 __builtin_msa_bneg_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bnegi_b (v16u8, imm0_7);
+v8u16 __builtin_msa_bnegi_h (v8u16, imm0_15);
+v4u32 __builtin_msa_bnegi_w (v4u32, imm0_31);
+v2u64 __builtin_msa_bnegi_d (v2u64, imm0_63);
+
+i32 __builtin_msa_bnz_b (v16u8);
+i32 __builtin_msa_bnz_h (v8u16);
+i32 __builtin_msa_bnz_w (v4u32);
+i32 __builtin_msa_bnz_d (v2u64);
+
+i32 __builtin_msa_bnz_v (v16u8);
+
+v16u8 __builtin_msa_bsel_v (v16u8, v16u8, v16u8);
+
+v16u8 __builtin_msa_bseli_b (v16u8, v16u8, imm0_255);
+
+v16u8 __builtin_msa_bset_b (v16u8, v16u8);
+v8u16 __builtin_msa_bset_h (v8u16, v8u16);
+v4u32 __builtin_msa_bset_w (v4u32, v4u32);
+v2u64 __builtin_msa_bset_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_bseti_b (v16u8, imm0_7);
+v8u16 __builtin_msa_bseti_h (v8u16, imm0_15);
+v4u32 __builtin_msa_bseti_w (v4u32, imm0_31);
+v2u64 __builtin_msa_bseti_d (v2u64, imm0_63);
+
+i32 __builtin_msa_bz_b (v16u8);
+i32 __builtin_msa_bz_h (v8u16);
+i32 __builtin_msa_bz_w (v4u32);
+i32 __builtin_msa_bz_d (v2u64);
+
+i32 __builtin_msa_bz_v (v16u8);
+
+v16i8 __builtin_msa_ceq_b (v16i8, v16i8);
+v8i16 __builtin_msa_ceq_h (v8i16, v8i16);
+v4i32 __builtin_msa_ceq_w (v4i32, v4i32);
+v2i64 __builtin_msa_ceq_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ceqi_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_ceqi_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_ceqi_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_ceqi_d (v2i64, imm_n16_15);
+
+i32 __builtin_msa_cfcmsa (imm0_31);
+
+v16i8 __builtin_msa_cle_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_cle_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_cle_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_cle_s_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_cle_u_b (v16u8, v16u8);
+v8i16 __builtin_msa_cle_u_h (v8u16, v8u16);
+v4i32 __builtin_msa_cle_u_w (v4u32, v4u32);
+v2i64 __builtin_msa_cle_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_clei_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_clei_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_clei_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_clei_s_d (v2i64, imm_n16_15);
+
+v16i8 __builtin_msa_clei_u_b (v16u8, imm0_31);
+v8i16 __builtin_msa_clei_u_h (v8u16, imm0_31);
+v4i32 __builtin_msa_clei_u_w (v4u32, imm0_31);
+v2i64 __builtin_msa_clei_u_d (v2u64, imm0_31);
+
+v16i8 __builtin_msa_clt_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_clt_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_clt_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_clt_s_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_clt_u_b (v16u8, v16u8);
+v8i16 __builtin_msa_clt_u_h (v8u16, v8u16);
+v4i32 __builtin_msa_clt_u_w (v4u32, v4u32);
+v2i64 __builtin_msa_clt_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_clti_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_clti_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_clti_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_clti_s_d (v2i64, imm_n16_15);
+
+v16i8 __builtin_msa_clti_u_b (v16u8, imm0_31);
+v8i16 __builtin_msa_clti_u_h (v8u16, imm0_31);
+v4i32 __builtin_msa_clti_u_w (v4u32, imm0_31);
+v2i64 __builtin_msa_clti_u_d (v2u64, imm0_31);
+
+i32 __builtin_msa_copy_s_b (v16i8, imm0_15);
+i32 __builtin_msa_copy_s_h (v8i16, imm0_7);
+i32 __builtin_msa_copy_s_w (v4i32, imm0_3);
+i64 __builtin_msa_copy_s_d (v2i64, imm0_1);
+
+u32 __builtin_msa_copy_u_b (v16i8, imm0_15);
+u32 __builtin_msa_copy_u_h (v8i16, imm0_7);
+u32 __builtin_msa_copy_u_w (v4i32, imm0_3);
+u64 __builtin_msa_copy_u_d (v2i64, imm0_1);
+
+void __builtin_msa_ctcmsa (imm0_31, i32);
+
+v16i8 __builtin_msa_div_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_div_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_div_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_div_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_div_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_div_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_div_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_div_u_d (v2u64, v2u64);
+
+v8i16 __builtin_msa_dotp_s_h (v16i8, v16i8);
+v4i32 __builtin_msa_dotp_s_w (v8i16, v8i16);
+v2i64 __builtin_msa_dotp_s_d (v4i32, v4i32);
+
+v8u16 __builtin_msa_dotp_u_h (v16u8, v16u8);
+v4u32 __builtin_msa_dotp_u_w (v8u16, v8u16);
+v2u64 __builtin_msa_dotp_u_d (v4u32, v4u32);
+
+v8i16 __builtin_msa_dpadd_s_h (v8i16, v16i8, v16i8);
+v4i32 __builtin_msa_dpadd_s_w (v4i32, v8i16, v8i16);
+v2i64 __builtin_msa_dpadd_s_d (v2i64, v4i32, v4i32);
+
+v8u16 __builtin_msa_dpadd_u_h (v8u16, v16u8, v16u8);
+v4u32 __builtin_msa_dpadd_u_w (v4u32, v8u16, v8u16);
+v2u64 __builtin_msa_dpadd_u_d (v2u64, v4u32, v4u32);
+
+v8i16 __builtin_msa_dpsub_s_h (v8i16, v16i8, v16i8);
+v4i32 __builtin_msa_dpsub_s_w (v4i32, v8i16, v8i16);
+v2i64 __builtin_msa_dpsub_s_d (v2i64, v4i32, v4i32);
+
+v8i16 __builtin_msa_dpsub_u_h (v8i16, v16u8, v16u8);
+v4i32 __builtin_msa_dpsub_u_w (v4i32, v8u16, v8u16);
+v2i64 __builtin_msa_dpsub_u_d (v2i64, v4u32, v4u32);
+
+v4f32 __builtin_msa_fadd_w (v4f32, v4f32);
+v2f64 __builtin_msa_fadd_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcaf_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcaf_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fceq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fceq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fclass_w (v4f32);
+v2i64 __builtin_msa_fclass_d (v2f64);
+
+v4i32 __builtin_msa_fcle_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcle_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fclt_w (v4f32, v4f32);
+v2i64 __builtin_msa_fclt_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcne_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcne_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcor_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcor_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcueq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcueq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcule_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcule_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcult_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcult_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcun_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcun_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fcune_w (v4f32, v4f32);
+v2i64 __builtin_msa_fcune_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fdiv_w (v4f32, v4f32);
+v2f64 __builtin_msa_fdiv_d (v2f64, v2f64);
+
+v8i16 __builtin_msa_fexdo_h (v4f32, v4f32);
+v4f32 __builtin_msa_fexdo_w (v2f64, v2f64);
+
+v4f32 __builtin_msa_fexp2_w (v4f32, v4i32);
+v2f64 __builtin_msa_fexp2_d (v2f64, v2i64);
+
+v4f32 __builtin_msa_fexupl_w (v8i16);
+v2f64 __builtin_msa_fexupl_d (v4f32);
+
+v4f32 __builtin_msa_fexupr_w (v8i16);
+v2f64 __builtin_msa_fexupr_d (v4f32);
+
+v4f32 __builtin_msa_ffint_s_w (v4i32);
+v2f64 __builtin_msa_ffint_s_d (v2i64);
+
+v4f32 __builtin_msa_ffint_u_w (v4u32);
+v2f64 __builtin_msa_ffint_u_d (v2u64);
+
+v4f32 __builtin_msa_ffql_w (v8i16);
+v2f64 __builtin_msa_ffql_d (v4i32);
+
+v4f32 __builtin_msa_ffqr_w (v8i16);
+v2f64 __builtin_msa_ffqr_d (v4i32);
+
+v16i8 __builtin_msa_fill_b (i32);
+v8i16 __builtin_msa_fill_h (i32);
+v4i32 __builtin_msa_fill_w (i32);
+v2i64 __builtin_msa_fill_d (i64);
+
+v4f32 __builtin_msa_flog2_w (v4f32);
+v2f64 __builtin_msa_flog2_d (v2f64);
+
+v4f32 __builtin_msa_fmadd_w (v4f32, v4f32, v4f32);
+v2f64 __builtin_msa_fmadd_d (v2f64, v2f64, v2f64);
+
+v4f32 __builtin_msa_fmax_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmax_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmax_a_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmax_a_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmin_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmin_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmin_a_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmin_a_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fmsub_w (v4f32, v4f32, v4f32);
+v2f64 __builtin_msa_fmsub_d (v2f64, v2f64, v2f64);
+
+v4f32 __builtin_msa_fmul_w (v4f32, v4f32);
+v2f64 __builtin_msa_fmul_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_frint_w (v4f32);
+v2f64 __builtin_msa_frint_d (v2f64);
+
+v4f32 __builtin_msa_frcp_w (v4f32);
+v2f64 __builtin_msa_frcp_d (v2f64);
+
+v4f32 __builtin_msa_frsqrt_w (v4f32);
+v2f64 __builtin_msa_frsqrt_d (v2f64);
+
+v4i32 __builtin_msa_fsaf_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsaf_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fseq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fseq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsle_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsle_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fslt_w (v4f32, v4f32);
+v2i64 __builtin_msa_fslt_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsne_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsne_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsor_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsor_d (v2f64, v2f64);
+
+v4f32 __builtin_msa_fsqrt_w (v4f32);
+v2f64 __builtin_msa_fsqrt_d (v2f64);
+
+v4f32 __builtin_msa_fsub_w (v4f32, v4f32);
+v2f64 __builtin_msa_fsub_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsueq_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsueq_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsule_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsule_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsult_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsult_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsun_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsun_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_fsune_w (v4f32, v4f32);
+v2i64 __builtin_msa_fsune_d (v2f64, v2f64);
+
+v4i32 __builtin_msa_ftint_s_w (v4f32);
+v2i64 __builtin_msa_ftint_s_d (v2f64);
+
+v4u32 __builtin_msa_ftint_u_w (v4f32);
+v2u64 __builtin_msa_ftint_u_d (v2f64);
+
+v8i16 __builtin_msa_ftq_h (v4f32, v4f32);
+v4i32 __builtin_msa_ftq_w (v2f64, v2f64);
+
+v4i32 __builtin_msa_ftrunc_s_w (v4f32);
+v2i64 __builtin_msa_ftrunc_s_d (v2f64);
+
+v4u32 __builtin_msa_ftrunc_u_w (v4f32);
+v2u64 __builtin_msa_ftrunc_u_d (v2f64);
+
+v8i16 __builtin_msa_hadd_s_h (v16i8, v16i8);
+v4i32 __builtin_msa_hadd_s_w (v8i16, v8i16);
+v2i64 __builtin_msa_hadd_s_d (v4i32, v4i32);
+
+v8u16 __builtin_msa_hadd_u_h (v16u8, v16u8);
+v4u32 __builtin_msa_hadd_u_w (v8u16, v8u16);
+v2u64 __builtin_msa_hadd_u_d (v4u32, v4u32);
+
+v8i16 __builtin_msa_hsub_s_h (v16i8, v16i8);
+v4i32 __builtin_msa_hsub_s_w (v8i16, v8i16);
+v2i64 __builtin_msa_hsub_s_d (v4i32, v4i32);
+
+v8i16 __builtin_msa_hsub_u_h (v16u8, v16u8);
+v4i32 __builtin_msa_hsub_u_w (v8u16, v8u16);
+v2i64 __builtin_msa_hsub_u_d (v4u32, v4u32);
+
+v16i8 __builtin_msa_ilvev_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvev_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvev_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvev_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ilvl_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvl_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvl_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvl_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ilvod_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvod_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvod_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvod_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_ilvr_b (v16i8, v16i8);
+v8i16 __builtin_msa_ilvr_h (v8i16, v8i16);
+v4i32 __builtin_msa_ilvr_w (v4i32, v4i32);
+v2i64 __builtin_msa_ilvr_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_insert_b (v16i8, imm0_15, i32);
+v8i16 __builtin_msa_insert_h (v8i16, imm0_7, i32);
+v4i32 __builtin_msa_insert_w (v4i32, imm0_3, i32);
+v2i64 __builtin_msa_insert_d (v2i64, imm0_1, i64);
+
+v16i8 __builtin_msa_insve_b (v16i8, imm0_15, v16i8);
+v8i16 __builtin_msa_insve_h (v8i16, imm0_7, v8i16);
+v4i32 __builtin_msa_insve_w (v4i32, imm0_3, v4i32);
+v2i64 __builtin_msa_insve_d (v2i64, imm0_1, v2i64);
+
+v16i8 __builtin_msa_ld_b (void *, imm_n512_511);
+v8i16 __builtin_msa_ld_h (void *, imm_n1024_1022);
+v4i32 __builtin_msa_ld_w (void *, imm_n2048_2044);
+v2i64 __builtin_msa_ld_d (void *, imm_n4096_4088);
+
+v16i8 __builtin_msa_ldi_b (imm_n512_511);
+v8i16 __builtin_msa_ldi_h (imm_n512_511);
+v4i32 __builtin_msa_ldi_w (imm_n512_511);
+v2i64 __builtin_msa_ldi_d (imm_n512_511);
+
+v8i16 __builtin_msa_madd_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_madd_q_w (v4i32, v4i32, v4i32);
+
+v8i16 __builtin_msa_maddr_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_maddr_q_w (v4i32, v4i32, v4i32);
+
+v16i8 __builtin_msa_maddv_b (v16i8, v16i8, v16i8);
+v8i16 __builtin_msa_maddv_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_maddv_w (v4i32, v4i32, v4i32);
+v2i64 __builtin_msa_maddv_d (v2i64, v2i64, v2i64);
+
+v16i8 __builtin_msa_max_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_max_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_max_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_max_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_max_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_max_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_max_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_max_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_max_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_max_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_max_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_max_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_maxi_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_maxi_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_maxi_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_maxi_s_d (v2i64, imm_n16_15);
+
+v16u8 __builtin_msa_maxi_u_b (v16u8, imm0_31);
+v8u16 __builtin_msa_maxi_u_h (v8u16, imm0_31);
+v4u32 __builtin_msa_maxi_u_w (v4u32, imm0_31);
+v2u64 __builtin_msa_maxi_u_d (v2u64, imm0_31);
+
+v16i8 __builtin_msa_min_a_b (v16i8, v16i8);
+v8i16 __builtin_msa_min_a_h (v8i16, v8i16);
+v4i32 __builtin_msa_min_a_w (v4i32, v4i32);
+v2i64 __builtin_msa_min_a_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_min_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_min_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_min_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_min_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_min_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_min_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_min_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_min_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_mini_s_b (v16i8, imm_n16_15);
+v8i16 __builtin_msa_mini_s_h (v8i16, imm_n16_15);
+v4i32 __builtin_msa_mini_s_w (v4i32, imm_n16_15);
+v2i64 __builtin_msa_mini_s_d (v2i64, imm_n16_15);
+
+v16u8 __builtin_msa_mini_u_b (v16u8, imm0_31);
+v8u16 __builtin_msa_mini_u_h (v8u16, imm0_31);
+v4u32 __builtin_msa_mini_u_w (v4u32, imm0_31);
+v2u64 __builtin_msa_mini_u_d (v2u64, imm0_31);
+
+v16i8 __builtin_msa_mod_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_mod_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_mod_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_mod_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_mod_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_mod_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_mod_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_mod_u_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_move_v (v16i8);
+
+v8i16 __builtin_msa_msub_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_msub_q_w (v4i32, v4i32, v4i32);
+
+v8i16 __builtin_msa_msubr_q_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_msubr_q_w (v4i32, v4i32, v4i32);
+
+v16i8 __builtin_msa_msubv_b (v16i8, v16i8, v16i8);
+v8i16 __builtin_msa_msubv_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_msubv_w (v4i32, v4i32, v4i32);
+v2i64 __builtin_msa_msubv_d (v2i64, v2i64, v2i64);
+
+v8i16 __builtin_msa_mul_q_h (v8i16, v8i16);
+v4i32 __builtin_msa_mul_q_w (v4i32, v4i32);
+
+v8i16 __builtin_msa_mulr_q_h (v8i16, v8i16);
+v4i32 __builtin_msa_mulr_q_w (v4i32, v4i32);
+
+v16i8 __builtin_msa_mulv_b (v16i8, v16i8);
+v8i16 __builtin_msa_mulv_h (v8i16, v8i16);
+v4i32 __builtin_msa_mulv_w (v4i32, v4i32);
+v2i64 __builtin_msa_mulv_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_nloc_b (v16i8);
+v8i16 __builtin_msa_nloc_h (v8i16);
+v4i32 __builtin_msa_nloc_w (v4i32);
+v2i64 __builtin_msa_nloc_d (v2i64);
+
+v16i8 __builtin_msa_nlzc_b (v16i8);
+v8i16 __builtin_msa_nlzc_h (v8i16);
+v4i32 __builtin_msa_nlzc_w (v4i32);
+v2i64 __builtin_msa_nlzc_d (v2i64);
+
+v16u8 __builtin_msa_nor_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_nori_b (v16u8, imm0_255);
+
+v16u8 __builtin_msa_or_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_ori_b (v16u8, imm0_255);
+
+v16i8 __builtin_msa_pckev_b (v16i8, v16i8);
+v8i16 __builtin_msa_pckev_h (v8i16, v8i16);
+v4i32 __builtin_msa_pckev_w (v4i32, v4i32);
+v2i64 __builtin_msa_pckev_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_pckod_b (v16i8, v16i8);
+v8i16 __builtin_msa_pckod_h (v8i16, v8i16);
+v4i32 __builtin_msa_pckod_w (v4i32, v4i32);
+v2i64 __builtin_msa_pckod_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_pcnt_b (v16i8);
+v8i16 __builtin_msa_pcnt_h (v8i16);
+v4i32 __builtin_msa_pcnt_w (v4i32);
+v2i64 __builtin_msa_pcnt_d (v2i64);
+
+v16i8 __builtin_msa_sat_s_b (v16i8, imm0_7);
+v8i16 __builtin_msa_sat_s_h (v8i16, imm0_15);
+v4i32 __builtin_msa_sat_s_w (v4i32, imm0_31);
+v2i64 __builtin_msa_sat_s_d (v2i64, imm0_63);
+
+v16u8 __builtin_msa_sat_u_b (v16u8, imm0_7);
+v8u16 __builtin_msa_sat_u_h (v8u16, imm0_15);
+v4u32 __builtin_msa_sat_u_w (v4u32, imm0_31);
+v2u64 __builtin_msa_sat_u_d (v2u64, imm0_63);
+
+v16i8 __builtin_msa_shf_b (v16i8, imm0_255);
+v8i16 __builtin_msa_shf_h (v8i16, imm0_255);
+v4i32 __builtin_msa_shf_w (v4i32, imm0_255);
+
+v16i8 __builtin_msa_sld_b (v16i8, v16i8, i32);
+v8i16 __builtin_msa_sld_h (v8i16, v8i16, i32);
+v4i32 __builtin_msa_sld_w (v4i32, v4i32, i32);
+v2i64 __builtin_msa_sld_d (v2i64, v2i64, i32);
+
+v16i8 __builtin_msa_sldi_b (v16i8, v16i8, imm0_15);
+v8i16 __builtin_msa_sldi_h (v8i16, v8i16, imm0_7);
+v4i32 __builtin_msa_sldi_w (v4i32, v4i32, imm0_3);
+v2i64 __builtin_msa_sldi_d (v2i64, v2i64, imm0_1);
+
+v16i8 __builtin_msa_sll_b (v16i8, v16i8);
+v8i16 __builtin_msa_sll_h (v8i16, v8i16);
+v4i32 __builtin_msa_sll_w (v4i32, v4i32);
+v2i64 __builtin_msa_sll_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_slli_b (v16i8, imm0_7);
+v8i16 __builtin_msa_slli_h (v8i16, imm0_15);
+v4i32 __builtin_msa_slli_w (v4i32, imm0_31);
+v2i64 __builtin_msa_slli_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_splat_b (v16i8, i32);
+v8i16 __builtin_msa_splat_h (v8i16, i32);
+v4i32 __builtin_msa_splat_w (v4i32, i32);
+v2i64 __builtin_msa_splat_d (v2i64, i32);
+
+v16i8 __builtin_msa_splati_b (v16i8, imm0_15);
+v8i16 __builtin_msa_splati_h (v8i16, imm0_7);
+v4i32 __builtin_msa_splati_w (v4i32, imm0_3);
+v2i64 __builtin_msa_splati_d (v2i64, imm0_1);
+
+v16i8 __builtin_msa_sra_b (v16i8, v16i8);
+v8i16 __builtin_msa_sra_h (v8i16, v8i16);
+v4i32 __builtin_msa_sra_w (v4i32, v4i32);
+v2i64 __builtin_msa_sra_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srai_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srai_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srai_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srai_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_srar_b (v16i8, v16i8);
+v8i16 __builtin_msa_srar_h (v8i16, v8i16);
+v4i32 __builtin_msa_srar_w (v4i32, v4i32);
+v2i64 __builtin_msa_srar_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srari_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srari_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srari_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srari_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_srl_b (v16i8, v16i8);
+v8i16 __builtin_msa_srl_h (v8i16, v8i16);
+v4i32 __builtin_msa_srl_w (v4i32, v4i32);
+v2i64 __builtin_msa_srl_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srli_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srli_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srli_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srli_d (v2i64, imm0_63);
+
+v16i8 __builtin_msa_srlr_b (v16i8, v16i8);
+v8i16 __builtin_msa_srlr_h (v8i16, v8i16);
+v4i32 __builtin_msa_srlr_w (v4i32, v4i32);
+v2i64 __builtin_msa_srlr_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_srlri_b (v16i8, imm0_7);
+v8i16 __builtin_msa_srlri_h (v8i16, imm0_15);
+v4i32 __builtin_msa_srlri_w (v4i32, imm0_31);
+v2i64 __builtin_msa_srlri_d (v2i64, imm0_63);
+
+void __builtin_msa_st_b (v16i8, void *, imm_n512_511);
+void __builtin_msa_st_h (v8i16, void *, imm_n1024_1022);
+void __builtin_msa_st_w (v4i32, void *, imm_n2048_2044);
+void __builtin_msa_st_d (v2i64, void *, imm_n4096_4088);
+
+v16i8 __builtin_msa_subs_s_b (v16i8, v16i8);
+v8i16 __builtin_msa_subs_s_h (v8i16, v8i16);
+v4i32 __builtin_msa_subs_s_w (v4i32, v4i32);
+v2i64 __builtin_msa_subs_s_d (v2i64, v2i64);
+
+v16u8 __builtin_msa_subs_u_b (v16u8, v16u8);
+v8u16 __builtin_msa_subs_u_h (v8u16, v8u16);
+v4u32 __builtin_msa_subs_u_w (v4u32, v4u32);
+v2u64 __builtin_msa_subs_u_d (v2u64, v2u64);
+
+v16u8 __builtin_msa_subsus_u_b (v16u8, v16i8);
+v8u16 __builtin_msa_subsus_u_h (v8u16, v8i16);
+v4u32 __builtin_msa_subsus_u_w (v4u32, v4i32);
+v2u64 __builtin_msa_subsus_u_d (v2u64, v2i64);
+
+v16i8 __builtin_msa_subsuu_s_b (v16u8, v16u8);
+v8i16 __builtin_msa_subsuu_s_h (v8u16, v8u16);
+v4i32 __builtin_msa_subsuu_s_w (v4u32, v4u32);
+v2i64 __builtin_msa_subsuu_s_d (v2u64, v2u64);
+
+v16i8 __builtin_msa_subv_b (v16i8, v16i8);
+v8i16 __builtin_msa_subv_h (v8i16, v8i16);
+v4i32 __builtin_msa_subv_w (v4i32, v4i32);
+v2i64 __builtin_msa_subv_d (v2i64, v2i64);
+
+v16i8 __builtin_msa_subvi_b (v16i8, imm0_31);
+v8i16 __builtin_msa_subvi_h (v8i16, imm0_31);
+v4i32 __builtin_msa_subvi_w (v4i32, imm0_31);
+v2i64 __builtin_msa_subvi_d (v2i64, imm0_31);
+
+v16i8 __builtin_msa_vshf_b (v16i8, v16i8, v16i8);
+v8i16 __builtin_msa_vshf_h (v8i16, v8i16, v8i16);
+v4i32 __builtin_msa_vshf_w (v4i32, v4i32, v4i32);
+v2i64 __builtin_msa_vshf_d (v2i64, v2i64, v2i64);
+
+v16u8 __builtin_msa_xor_v (v16u8, v16u8);
+
+v16u8 __builtin_msa_xori_b (v16u8, imm0_255);
diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs
new file mode 100644
index 000000000..22108d26a
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/src/lib.rs
@@ -0,0 +1,525 @@
+#![deny(rust_2018_idioms)]
+#[macro_use]
+extern crate quote;
+#[macro_use]
+extern crate syn;
+
+use proc_macro::TokenStream;
+use std::{fs::File, io::Read, path::Path};
+use syn::ext::IdentExt;
+
+#[proc_macro]
+pub fn x86_functions(input: TokenStream) -> TokenStream {
+    functions(input, &["core_arch/src/x86", "core_arch/src/x86_64"])
+}
+
+#[proc_macro]
+pub fn arm_functions(input: TokenStream) -> TokenStream {
+    functions(
+        input,
+        &[
+            "core_arch/src/arm",
+            "core_arch/src/aarch64",
+            "core_arch/src/arm_shared/neon",
+        ],
+    )
+}
+
+#[proc_macro]
+pub fn mips_functions(input: TokenStream) -> TokenStream {
+    functions(input, &["core_arch/src/mips"])
+}
+
+fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
+    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
+    let root = dir.parent().expect("root-dir not found");
+
+    let mut files = Vec::new();
+    for dir in dirs {
+        walk(&root.join(dir), &mut files);
+    }
+    assert!(!files.is_empty());
+
+    let mut functions = Vec::new();
+    for &mut (ref mut file, ref path) in &mut files {
+        for mut item in file.items.drain(..) {
+            match item {
+                syn::Item::Fn(f) => functions.push((f, path)),
+                syn::Item::Mod(ref mut m) => {
+                    if let Some(ref mut m) = m.content {
+                        for i in m.1.drain(..) {
+                            if let syn::Item::Fn(f) = i {
+                                functions.push((f, path))
+                            }
+                        }
+                    }
+                }
+                _ => (),
+            }
+        }
+    }
+    assert!(!functions.is_empty());
+
+    let mut tests = std::collections::HashSet::<String>::new();
+    for f in &functions {
+        let id = format!("{}", f.0.sig.ident);
+        if id.starts_with("test_") {
+            tests.insert(id);
+        }
+    }
+    assert!(!tests.is_empty());
+
+    functions.retain(|&(ref f, _)| {
+        if let syn::Visibility::Public(_) = f.vis {
+            if f.sig.unsafety.is_some() {
+                return true;
+            }
+        }
+        false
+    });
+    assert!(!functions.is_empty());
+
+    let input = proc_macro2::TokenStream::from(input);
+
+    let functions = functions
+        .iter()
+        .map(|&(ref f, path)| {
+            let name = &f.sig.ident;
+            // println!("{}", name);
+            let mut arguments = Vec::new();
+            let mut const_arguments = Vec::new();
+            for input in f.sig.inputs.iter() {
+                let ty = match *input {
+                    syn::FnArg::Typed(ref c) => &c.ty,
+                    _ => panic!("invalid argument on {}", name),
+                };
+                arguments.push(to_type(ty));
+            }
+            for generic in f.sig.generics.params.iter() {
+                let ty = match *generic {
+                    syn::GenericParam::Const(ref c) => &c.ty,
+                    _ => panic!("invalid generic argument on {}", name),
+                };
+                const_arguments.push(to_type(ty));
+            }
+            let ret = match f.sig.output {
+                syn::ReturnType::Default => quote! { None },
+                syn::ReturnType::Type(_, ref t) => {
+                    let ty = to_type(t);
+                    quote! { Some(#ty) }
+                }
+            };
+            let instrs = find_instrs(&f.attrs);
+            let target_feature = if let Some(i) = find_target_feature(&f.attrs) {
+                quote! { Some(#i) }
+            } else {
+                quote! { None }
+            };
+
+            let required_const = find_required_const("rustc_args_required_const", &f.attrs);
+            let mut legacy_const_generics =
+                find_required_const("rustc_legacy_const_generics", &f.attrs);
+            if !required_const.is_empty() && !legacy_const_generics.is_empty() {
+                panic!(
+                    "Can't have both #[rustc_args_required_const] and \
+                     #[rustc_legacy_const_generics]"
+                );
+            }
+
+            // The list of required consts, used to verify the arguments, comes from either the
+            // `rustc_args_required_const` or the `rustc_legacy_const_generics` attribute.
+            let required_const = if required_const.is_empty() {
+                legacy_const_generics.clone()
+            } else {
+                required_const
+            };
+
+            legacy_const_generics.sort();
+            for (idx, ty) in legacy_const_generics
+                .into_iter()
+                .zip(const_arguments.into_iter())
+            {
+                arguments.insert(idx, ty);
+            }
+
+            // strip leading underscore from fn name when building a test
+            // _mm_foo -> mm_foo such that the test name is test_mm_foo.
+            let test_name_string = format!("{}", name);
+            let mut test_name_id = test_name_string.as_str();
+            while test_name_id.starts_with('_') {
+                test_name_id = &test_name_id[1..];
+            }
+            let has_test = tests.contains(&format!("test_{}", test_name_id));
+
+            quote! {
+                Function {
+                    name: stringify!(#name),
+                    arguments: &[#(#arguments),*],
+                    ret: #ret,
+                    target_feature: #target_feature,
+                    instrs: &[#(#instrs),*],
+                    file: stringify!(#path),
+                    required_const: &[#(#required_const),*],
+                    has_test: #has_test,
+                }
+            }
+        })
+        .collect::<Vec<_>>();
+
+    let ret = quote! { #input: &[Function] = &[#(#functions),*]; };
+    // println!("{}", ret);
+    ret.into()
+}
+
+fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
+    match *t {
+        syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() {
+            // x86 ...
+            "__m128" => quote! { &M128 },
+            "__m128bh" => quote! { &M128BH },
+            "__m128d" => quote! { &M128D },
+            "__m128i" => quote! { &M128I },
+            "__m256" => quote! { &M256 },
+            "__m256bh" => quote! { &M256BH },
+            "__m256d" => quote! { &M256D },
+            "__m256i" => quote! { &M256I },
+            "__m512" => quote! { &M512 },
+            "__m512bh" => quote! { &M512BH },
+            "__m512d" => quote! { &M512D },
+            "__m512i" => quote! { &M512I },
+            "__mmask8" => quote! { &MMASK8 },
+            "__mmask16" => quote! { &MMASK16 },
+            "__mmask32" => quote! { &MMASK32 },
+            "__mmask64" => quote! { &MMASK64 },
+            "_MM_CMPINT_ENUM" => quote! { &MM_CMPINT_ENUM },
+            "_MM_MANTISSA_NORM_ENUM" => quote! { &MM_MANTISSA_NORM_ENUM },
+            "_MM_MANTISSA_SIGN_ENUM" => quote! { &MM_MANTISSA_SIGN_ENUM },
+            "_MM_PERM_ENUM" => quote! { &MM_PERM_ENUM },
+            "__m64" => quote! { &M64 },
+            "bool" => quote! { &BOOL },
+            "f32" => quote! { &F32 },
+            "f64" => quote! { &F64 },
+            "i16" => quote! { &I16 },
+            "i32" => quote! { &I32 },
+            "i64" => quote! { &I64 },
+            "i8" => quote! { &I8 },
+            "u16" => quote! { &U16 },
+            "u32" => quote! { &U32 },
+            "u64" => quote! { &U64 },
+            "u128" => quote! { &U128 },
+            "u8" => quote! { &U8 },
+            "p8" => quote! { &P8 },
+            "p16" => quote! { &P16 },
+            "Ordering" => quote! { &ORDERING },
+            "CpuidResult" => quote! { &CPUID },
+
+            // arm ...
+            "int8x4_t" => quote! { &I8X4 },
+            "int8x8_t" => quote! { &I8X8 },
+            "int8x8x2_t" => quote! { &I8X8X2 },
+            "int8x8x3_t" => quote! { &I8X8X3 },
+            "int8x8x4_t" => quote! { &I8X8X4 },
+            "int8x16x2_t" => quote! { &I8X16X2 },
+            "int8x16x3_t" => quote! { &I8X16X3 },
+            "int8x16x4_t" => quote! { &I8X16X4 },
+            "int8x16_t" => quote! { &I8X16 },
+            "int16x2_t" => quote! { &I16X2 },
+            "int16x4_t" => quote! { &I16X4 },
+            "int16x4x2_t" => quote! { &I16X4X2 },
+            "int16x4x3_t" => quote! { &I16X4X3 },
+            "int16x4x4_t" => quote! { &I16X4X4 },
+            "int16x8_t" => quote! { &I16X8 },
+            "int16x8x2_t" => quote! { &I16X8X2 },
+            "int16x8x3_t" => quote! { &I16X8X3 },
+            "int16x8x4_t" => quote! { &I16X8X4 },
+            "int32x2_t" => quote! { &I32X2 },
+            "int32x2x2_t" => quote! { &I32X2X2 },
+            "int32x2x3_t" => quote! { &I32X2X3 },
+            "int32x2x4_t" => quote! { &I32X2X4 },
+            "int32x4_t" => quote! { &I32X4 },
+            "int32x4x2_t" => quote! { &I32X4X2 },
+            "int32x4x3_t" => quote! { &I32X4X3 },
+            "int32x4x4_t" => quote! { &I32X4X4 },
+            "int64x1_t" => quote! { &I64X1 },
+            "int64x1x2_t" => quote! { &I64X1X2 },
+            "int64x1x3_t" => quote! { &I64X1X3 },
+            "int64x1x4_t" => quote! { &I64X1X4 },
+            "int64x2_t" => quote! { &I64X2 },
+            "int64x2x2_t" => quote! { &I64X2X2 },
+            "int64x2x3_t" => quote! { &I64X2X3 },
+            "int64x2x4_t" => quote! { &I64X2X4 },
+            "uint8x8_t" => quote! { &U8X8 },
+            "uint8x4_t" => quote! { &U8X4 },
+            "uint8x8x2_t" => quote! { &U8X8X2 },
+            "uint8x16x2_t" => quote! { &U8X16X2 },
+            "uint8x16x3_t" => quote! { &U8X16X3 },
+            "uint8x16x4_t" => quote! { &U8X16X4 },
+            "uint8x8x3_t" => quote! { &U8X8X3 },
+            "uint8x8x4_t" => quote! { &U8X8X4 },
+            "uint8x16_t" => quote! { &U8X16 },
+            "uint16x4_t" => quote! { &U16X4 },
+            "uint16x4x2_t" => quote! { &U16X4X2 },
+            "uint16x4x3_t" => quote! { &U16X4X3 },
+            "uint16x4x4_t" => quote! { &U16X4X4 },
+            "uint16x8_t" => quote! { &U16X8 },
+            "uint16x8x2_t" => quote! { &U16X8X2 },
+            "uint16x8x3_t" => quote! { &U16X8X3 },
+            "uint16x8x4_t" => quote! { &U16X8X4 },
+            "uint32x2_t" => quote! { &U32X2 },
+            "uint32x2x2_t" => quote! { &U32X2X2 },
+            "uint32x2x3_t" => quote! { &U32X2X3 },
+            "uint32x2x4_t" => quote! { &U32X2X4 },
+            "uint32x4_t" => quote! { &U32X4 },
+            "uint32x4x2_t" => quote! { &U32X4X2 },
+            "uint32x4x3_t" => quote! { &U32X4X3 },
+            "uint32x4x4_t" => quote! { &U32X4X4 },
+            "uint64x1_t" => quote! { &U64X1 },
+            "uint64x1x2_t" => quote! { &U64X1X2 },
+            "uint64x1x3_t" => quote! { &U64X1X3 },
+            "uint64x1x4_t" => quote! { &U64X1X4 },
+            "uint64x2_t" => quote! { &U64X2 },
+            "uint64x2x2_t" => quote! { &U64X2X2 },
+            "uint64x2x3_t" => quote! { &U64X2X3 },
+            "uint64x2x4_t" => quote! { &U64X2X4 },
+            "float32x2_t" => quote! { &F32X2 },
+            "float32x2x2_t" => quote! { &F32X2X2 },
+            "float32x2x3_t" => quote! { &F32X2X3 },
+            "float32x2x4_t" => quote! { &F32X2X4 },
+            "float32x4_t" => quote! { &F32X4 },
+            "float32x4x2_t" => quote! { &F32X4X2 },
+            "float32x4x3_t" => quote! { &F32X4X3 },
+            "float32x4x4_t" => quote! { &F32X4X4 },
+            "float64x1_t" => quote! { &F64X1 },
+            "float64x1x2_t" => quote! { &F64X1X2 },
+            "float64x1x3_t" => quote! { &F64X1X3 },
+            "float64x1x4_t" => quote! { &F64X1X4 },
+            "float64x2_t" => quote! { &F64X2 },
+            "float64x2x2_t" => quote! { &F64X2X2 },
+            "float64x2x3_t" => quote! { &F64X2X3 },
+            "float64x2x4_t" => quote! { &F64X2X4 },
+            "poly8x8_t" => quote! { &POLY8X8 },
+            "poly8x8x2_t" => quote! { &POLY8X8X2 },
+            "poly8x8x3_t" => quote! { &POLY8X8X3 },
+            "poly8x8x4_t" => quote! { &POLY8X8X4 },
+            "poly8x16x2_t" => quote! { &POLY8X16X2 },
+            "poly8x16x3_t" => quote! { &POLY8X16X3 },
+            "poly8x16x4_t" => quote! { &POLY8X16X4 },
+            "p64" => quote! { &P64 },
+            "poly64x1_t" => quote! { &POLY64X1 },
+            "poly64x2_t" => quote! { &POLY64X2 },
+            "poly8x16_t" => quote! { &POLY8X16 },
+            "poly16x4_t" => quote! { &POLY16X4 },
+            "poly16x4x2_t" => quote! { &P16X4X2 },
+            "poly16x4x3_t" => quote! { &P16X4X3 },
+            "poly16x4x4_t" => quote! { &P16X4X4 },
+            "poly16x8_t" => quote! { &POLY16X8 },
+            "poly16x8x2_t" => quote! { &P16X8X2 },
+            "poly16x8x3_t" => quote! { &P16X8X3 },
+            "poly16x8x4_t" => quote! { &P16X8X4 },
+            "poly64x1x2_t" => quote! { &P64X1X2 },
+            "poly64x1x3_t" => quote! { &P64X1X3 },
+            "poly64x1x4_t" => quote! { &P64X1X4 },
+            "poly64x2x2_t" => quote! { &P64X2X2 },
+            "poly64x2x3_t" => quote! { &P64X2X3 },
+            "poly64x2x4_t" => quote! { &P64X2X4 },
+            "p128" => quote! { &P128 },
+
+            "v16i8" => quote! { &v16i8 },
+            "v8i16" => quote! { &v8i16 },
+            "v4i32" => quote! { &v4i32 },
+            "v2i64" => quote! { &v2i64 },
+            "v16u8" => quote! { &v16u8 },
+            "v8u16" => quote! { &v8u16 },
+            "v4u32" => quote! { &v4u32 },
+            "v2u64" => quote! { &v2u64 },
+            "v8f16" => quote! { &v8f16 },
+            "v4f32" => quote! { &v4f32 },
+            "v2f64" => quote! { &v2f64 },
+
+            s => panic!("unsupported type: \"{}\"", s),
+        },
+        syn::Type::Ptr(syn::TypePtr {
+            ref elem,
+            ref mutability,
+            ..
+        })
+        | syn::Type::Reference(syn::TypeReference {
+            ref elem,
+            ref mutability,
+            ..
+        }) => {
+            // Both pointers and references can have a mut token (*mut and &mut)
+            if mutability.is_some() {
+                let tokens = to_type(&elem);
+                quote! { &Type::MutPtr(#tokens) }
+            } else {
+                // If they don't (*const or &) then they are "const"
+                let tokens = to_type(&elem);
+                quote! { &Type::ConstPtr(#tokens) }
+            }
+        }
+
+        syn::Type::Slice(_) => panic!("unsupported slice"),
+        syn::Type::Array(_) => panic!("unsupported array"),
+        syn::Type::Tuple(_) => quote! { &TUPLE },
+        syn::Type::Never(_) => quote! { &NEVER },
+        _ => panic!("unsupported type"),
+    }
+}
+
+fn extract_path_ident(path: &syn::Path) -> syn::Ident {
+    if path.leading_colon.is_some() {
+        panic!("unsupported leading colon in path")
+    }
+    if path.segments.len() != 1 {
+        panic!("unsupported path that needs name resolution")
+    }
+    match path.segments.first().expect("segment not found").arguments {
+        syn::PathArguments::None => {}
+        _ => panic!("unsupported path that has path arguments"),
+    }
+    path.segments
+        .first()
+        .expect("segment not found")
+        .ident
+        .clone()
+}
+
+fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) {
+    for file in root.read_dir().unwrap() {
+        let file = file.unwrap();
+        if file.file_type().unwrap().is_dir() {
+            walk(&file.path(), files);
+            continue;
+        }
+        let path = file.path();
+        if path.extension().and_then(std::ffi::OsStr::to_str) != Some("rs") {
+            continue;
+        }
+
+        if path.file_name().and_then(std::ffi::OsStr::to_str) == Some("test.rs") {
+            continue;
+        }
+
+        let mut contents = String::new();
+        File::open(&path)
+            .unwrap_or_else(|_| panic!("can't open file at path: {}", path.display()))
+            .read_to_string(&mut contents)
+            .expect("failed to read file to string");
+
+        files.push((
+            syn::parse_str::<syn::File>(&contents).expect("failed to parse"),
+            path.display().to_string(),
+        ));
+    }
+}
+
+fn find_instrs(attrs: &[syn::Attribute]) -> Vec<String> {
+    struct AssertInstr {
+        instr: String,
+    }
+
+    // A small custom parser to parse out the instruction in `assert_instr`.
+    //
+    // TODO: should probably just reuse `Invoc` from the `assert-instr-macro`
+    // crate.
+    impl syn::parse::Parse for AssertInstr {
+        fn parse(content: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
+            let input;
+            parenthesized!(input in content);
+            let _ = input.parse::<syn::Meta>()?;
+            let _ = input.parse::<Token![,]>()?;
+            let ident = input.parse::<syn::Ident>()?;
+            if ident != "assert_instr" {
+                return Err(input.error("expected `assert_instr`"));
+            }
+            let instrs;
+            parenthesized!(instrs in input);
+
+            let mut instr = String::new();
+            while !instrs.is_empty() {
+                if let Ok(lit) = instrs.parse::<syn::LitStr>() {
+                    instr.push_str(&lit.value());
+                } else if let Ok(ident) = instrs.call(syn::Ident::parse_any) {
+                    instr.push_str(&ident.to_string());
+                } else if instrs.parse::<Token![.]>().is_ok() {
+                    instr.push('.');
+                } else if instrs.parse::<Token![,]>().is_ok() {
+                    // consume everything remaining
+                    drop(instrs.parse::<proc_macro2::TokenStream>());
+                    break;
+                } else {
+                    return Err(input.error("failed to parse instruction"));
+                }
+            }
+            Ok(Self { instr })
+        }
+    }
+
+    attrs
+        .iter()
+        .filter(|a| a.path.is_ident("cfg_attr"))
+        .filter_map(|a| {
+            syn::parse2::<AssertInstr>(a.tokens.clone())
+                .ok()
+                .map(|a| a.instr)
+        })
+        .collect()
+}
+
+fn find_target_feature(attrs: &[syn::Attribute]) -> Option<syn::Lit> {
+    attrs
+        .iter()
+        .flat_map(|a| {
+            if let Ok(a) = a.parse_meta() {
+                if let syn::Meta::List(i) = a {
+                    if i.path.is_ident("target_feature") {
+                        return i.nested;
+                    }
+                }
+            }
+            syn::punctuated::Punctuated::new()
+        })
+        .filter_map(|nested| match nested {
+            syn::NestedMeta::Meta(m) => Some(m),
+            syn::NestedMeta::Lit(_) => None,
+        })
+        .find_map(|m| match m {
+            syn::Meta::NameValue(ref i) if i.path.is_ident("enable") => Some(i.clone().lit),
+            _ => None,
+        })
+}
+
+fn find_required_const(name: &str, attrs: &[syn::Attribute]) -> Vec<usize> {
+    attrs
+        .iter()
+        .flat_map(|a| {
+            if a.path.segments[0].ident == name {
+                syn::parse::<RustcArgsRequiredConst>(a.tokens.clone().into())
+                    .unwrap()
+                    .args
+            } else {
+                Vec::new()
+            }
+        })
+        .collect()
+}
+
+struct RustcArgsRequiredConst {
+    args: Vec<usize>,
+}
+
+impl syn::parse::Parse for RustcArgsRequiredConst {
+    fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> {
+        let content;
+        parenthesized!(content in input);
+        let list =
+            syn::punctuated::Punctuated::<syn::LitInt, Token![,]>::parse_terminated(&content)?;
+        Ok(Self {
+            args: list
+                .into_iter()
+                .map(|a| a.base10_parse::<usize>())
+                .collect::<syn::Result<_>>()?,
+        })
+    }
+}
diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs
new file mode 100644
index 000000000..6ce5ce05f
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs
@@ -0,0 +1,988 @@
+#![allow(bad_style)]
+#![allow(unused)]
+use std::{collections::HashMap, rc::Rc};
+
+use html5ever::{
+    driver::ParseOpts,
+    parse_document,
+    rcdom::{Node, NodeData, RcDom},
+    tendril::TendrilSink,
+    tree_builder::TreeBuilderOpts,
+};
+
+struct Function {
+    name: &'static str,
+    arguments: &'static [&'static Type],
+    ret: Option<&'static Type>,
+    target_feature: Option<&'static str>,
+    instrs: &'static [&'static str],
+    file: &'static str,
+    required_const: &'static [usize],
+    has_test: bool,
+}
+
+static F16: Type = Type::PrimFloat(16);
+static F32: Type = Type::PrimFloat(32);
+static F64: Type = Type::PrimFloat(64);
+static I16: Type = Type::PrimSigned(16);
+static I32: Type = Type::PrimSigned(32);
+static I64: Type = Type::PrimSigned(64);
+static I8: Type = Type::PrimSigned(8);
+static U16: Type = Type::PrimUnsigned(16);
+static U32: Type = Type::PrimUnsigned(32);
+static U64: Type = Type::PrimUnsigned(64);
+static U8: Type = Type::PrimUnsigned(8);
+static NEVER: Type = Type::Never;
+
+static F16X4: Type = Type::F(16, 4, 1);
+static F16X4X2: Type = Type::F(16, 4, 2);
+static F16X4X3: Type = Type::F(16, 4, 3);
+static F16X4X4: Type = Type::F(16, 4, 4);
+static F16X8: Type = Type::F(16, 8, 1);
+static F16X8X2: Type = Type::F(16, 8, 2);
+static F16X8X3: Type = Type::F(16, 8, 3);
+static F16X8X4: Type = Type::F(16, 8, 4);
+static F32X2: Type = Type::F(32, 2, 1);
+static F32X2X2: Type = Type::F(32, 2, 2);
+static F32X2X3: Type = Type::F(32, 2, 3);
+static F32X2X4: Type = Type::F(32, 2, 4);
+static F32X4: Type = Type::F(32, 4, 1);
+static F32X4X2: Type = Type::F(32, 4, 2);
+static F32X4X3: Type = Type::F(32, 4, 3);
+static F32X4X4: Type = Type::F(32, 4, 4);
+static F64X1: Type = Type::F(64, 1, 1);
+static F64X1X2: Type = Type::F(64, 1, 2);
+static F64X1X3: Type = Type::F(64, 1, 3);
+static F64X1X4: Type = Type::F(64, 1, 4);
+static F64X2: Type = Type::F(64, 2, 1);
+static F64X2X2: Type = Type::F(64, 2, 2);
+static F64X2X3: Type = Type::F(64, 2, 3);
+static F64X2X4: Type = Type::F(64, 2, 4);
+static I16X2: Type = Type::I(16, 2, 1);
+static I16X4: Type = Type::I(16, 4, 1);
+static I16X4X2: Type = Type::I(16, 4, 2);
+static I16X4X3: Type = Type::I(16, 4, 3);
+static I16X4X4: Type = Type::I(16, 4, 4);
+static I16X8: Type = Type::I(16, 8, 1);
+static I16X8X2: Type = Type::I(16, 8, 2);
+static I16X8X3: Type = Type::I(16, 8, 3);
+static I16X8X4: Type = Type::I(16, 8, 4);
+static I32X2: Type = Type::I(32, 2, 1);
+static I32X2X2: Type = Type::I(32, 2, 2);
+static I32X2X3: Type = Type::I(32, 2, 3);
+static I32X2X4: Type = Type::I(32, 2, 4);
+static I32X4: Type = Type::I(32, 4, 1);
+static I32X4X2: Type = Type::I(32, 4, 2);
+static I32X4X3: Type = Type::I(32, 4, 3);
+static I32X4X4: Type = Type::I(32, 4, 4);
+static I64X1: Type = Type::I(64, 1, 1);
+static I64X1X2: Type = Type::I(64, 1, 2);
+static I64X1X3: Type = Type::I(64, 1, 3);
+static I64X1X4: Type = Type::I(64, 1, 4);
+static I64X2: Type = Type::I(64, 2, 1);
+static I64X2X2: Type = Type::I(64, 2, 2);
+static I64X2X3: Type = Type::I(64, 2, 3);
+static I64X2X4: Type = Type::I(64, 2, 4);
+static I8X16: Type = Type::I(8, 16, 1);
+static I8X16X2: Type = Type::I(8, 16, 2);
+static I8X16X3: Type = Type::I(8, 16, 3);
+static I8X16X4: Type = Type::I(8, 16, 4);
+static I8X4: Type = Type::I(8, 4, 1);
+static I8X8: Type = Type::I(8, 8, 1);
+static I8X8X2: Type = Type::I(8, 8, 2);
+static I8X8X3: Type = Type::I(8, 8, 3);
+static I8X8X4: Type = Type::I(8, 8, 4);
+static P128: Type = Type::PrimPoly(128);
+static P16: Type = Type::PrimPoly(16);
+static P16X4X2: Type = Type::P(16, 4, 2);
+static P16X4X3: Type = Type::P(16, 4, 3);
+static P16X4X4: Type = Type::P(16, 4, 4);
+static P16X8X2: Type = Type::P(16, 8, 2);
+static P16X8X3: Type = Type::P(16, 8, 3);
+static P16X8X4: Type = Type::P(16, 8, 4);
+static P64: Type = Type::PrimPoly(64);
+static P64X1X2: Type = Type::P(64, 1, 2);
+static P64X1X3: Type = Type::P(64, 1, 3);
+static P64X1X4: Type = Type::P(64, 1, 4);
+static P64X2X2: Type = Type::P(64, 2, 2);
+static P64X2X3: Type = Type::P(64, 2, 3);
+static P64X2X4: Type = Type::P(64, 2, 4);
+static P8: Type = Type::PrimPoly(8);
+static POLY16X4: Type = Type::P(16, 4, 1);
+static POLY16X8: Type = Type::P(16, 8, 1);
+static POLY64X1: Type = Type::P(64, 1, 1);
+static POLY64X2: Type = Type::P(64, 2, 1);
+static POLY8X16: Type = Type::P(8, 16, 1);
+static POLY8X16X2: Type = Type::P(8, 16, 2);
+static POLY8X16X3: Type = Type::P(8, 16, 3);
+static POLY8X16X4: Type = Type::P(8, 16, 4);
+static POLY8X8: Type = Type::P(8, 8, 1);
+static POLY8X8X2: Type = Type::P(8, 8, 2);
+static POLY8X8X3: Type = Type::P(8, 8, 3);
+static POLY8X8X4: Type = Type::P(8, 8, 4);
+static U16X4: Type = Type::U(16, 4, 1);
+static U16X4X2: Type = Type::U(16, 4, 2);
+static U16X4X3: Type = Type::U(16, 4, 3);
+static U16X4X4: Type = Type::U(16, 4, 4);
+static U16X8: Type = Type::U(16, 8, 1);
+static U16X8X2: Type = Type::U(16, 8, 2);
+static U16X8X3: Type = Type::U(16, 8, 3);
+static U16X8X4: Type = Type::U(16, 8, 4);
+static U32X2: Type = Type::U(32, 2, 1);
+static U32X2X2: Type = Type::U(32, 2, 2);
+static U32X2X3: Type = Type::U(32, 2, 3);
+static U32X2X4: Type = Type::U(32, 2, 4);
+static U32X4: Type = Type::U(32, 4, 1);
+static U32X4X2: Type = Type::U(32, 4, 2);
+static U32X4X3: Type = Type::U(32, 4, 3);
+static U32X4X4: Type = Type::U(32, 4, 4);
+static U64X1: Type = Type::U(64, 1, 1);
+static U64X1X2: Type = Type::U(64, 1, 2);
+static U64X1X3: Type = Type::U(64, 1, 3);
+static U64X1X4: Type = Type::U(64, 1, 4);
+static U64X2: Type = Type::U(64, 2, 1);
+static U64X2X2: Type = Type::U(64, 2, 2);
+static U64X2X3: Type = Type::U(64, 2, 3);
+static U64X2X4: Type = Type::U(64, 2, 4);
+static U8X16: Type = Type::U(8, 16, 1);
+static U8X16X2: Type = Type::U(8, 16, 2);
+static U8X16X3: Type = Type::U(8, 16, 3);
+static U8X16X4: Type = Type::U(8, 16, 4);
+static U8X8: Type = Type::U(8, 8, 1);
+static U8X4: Type = Type::U(8, 4, 1);
+static U8X8X2: Type = Type::U(8, 8, 2);
+static U8X8X3: Type = Type::U(8, 8, 3);
+static U8X8X4: Type = Type::U(8, 8, 4);
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+enum Type {
+    PrimFloat(u8),
+    PrimSigned(u8),
+    PrimUnsigned(u8),
+    PrimPoly(u8),
+    MutPtr(&'static Type),
+    ConstPtr(&'static Type),
+    I(u8, u8, u8),
+    U(u8, u8, u8),
+    P(u8, u8, u8),
+    F(u8, u8, u8),
+    Never,
+}
+
+stdarch_verify::arm_functions!(static FUNCTIONS);
+
+macro_rules! bail {
+    ($($t:tt)*) => (return Err(format!($($t)*)))
+}
+
+#[test]
+fn verify_all_signatures() {
+    // This is a giant HTML blob downloaded from
+    // https://developer.arm.com/technologies/neon/intrinsics which contains all
+    // NEON intrinsics at least. We do manual HTML parsing below.
+    let html = include_bytes!("../arm-intrinsics.html");
+    let mut html = &html[..];
+    let opts = ParseOpts {
+        tree_builder: TreeBuilderOpts {
+            drop_doctype: true,
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+    let dom = parse_document(RcDom::default(), opts)
+        .from_utf8()
+        .read_from(&mut html)
+        .unwrap();
+
+    let accordion = find_accordion(&dom.document).unwrap();
+    let map = parse_intrinsics(&accordion);
+
+    let mut all_valid = true;
+    'outer: for rust in FUNCTIONS {
+        if !rust.has_test {
+            let skip = [
+                "vaddq_s64",
+                "vaddq_u64",
+                "vrsqrte_f32",
+                "vtbl1_s8",
+                "vtbl1_u8",
+                "vtbl1_p8",
+                "vtbl2_s8",
+                "vtbl2_u8",
+                "vtbl2_p8",
+                "vtbl3_s8",
+                "vtbl3_u8",
+                "vtbl3_p8",
+                "vtbl4_s8",
+                "vtbl4_u8",
+                "vtbl4_p8",
+                "vtbx1_s8",
+                "vtbx1_u8",
+                "vtbx1_p8",
+                "vtbx2_s8",
+                "vtbx2_u8",
+                "vtbx2_p8",
+                "vtbx3_s8",
+                "vtbx3_u8",
+                "vtbx3_p8",
+                "vtbx4_s8",
+                "vtbx4_u8",
+                "vtbx4_p8",
+                "udf",
+                "_clz_u8",
+                "_clz_u16",
+                "_clz_u32",
+                "_rbit_u32",
+                "_rev_u16",
+                "_rev_u32",
+                "__breakpoint",
+                "vpminq_f32",
+                "vpminq_f64",
+                "vpmaxq_f32",
+                "vpmaxq_f64",
+                "vcombine_s8",
+                "vcombine_s16",
+                "vcombine_s32",
+                "vcombine_s64",
+                "vcombine_u8",
+                "vcombine_u16",
+                "vcombine_u32",
+                "vcombine_u64",
+                "vcombine_p64",
+                "vcombine_f32",
+                "vcombine_p8",
+                "vcombine_p16",
+                "vcombine_f64",
+                "vtbl1_s8",
+                "vtbl1_u8",
+                "vtbl1_p8",
+                "vtbl2_s8",
+                "vtbl2_u8",
+                "vtbl2_p8",
+                "vtbl3_s8",
+                "vtbl3_u8",
+                "vtbl3_p8",
+                "vtbl4_s8",
+                "vtbl4_u8",
+                "vtbl4_p8",
+                "vtbx1_s8",
+                "vtbx1_u8",
+                "vtbx1_p8",
+                "vtbx2_s8",
+                "vtbx2_u8",
+                "vtbx2_p8",
+                "vtbx3_s8",
+                "vtbx3_u8",
+                "vtbx3_p8",
+                "vtbx4_s8",
+                "vtbx4_u8",
+                "vtbx4_p8",
+                "vqtbl1_s8",
+                "vqtbl1q_s8",
+                "vqtbl1_u8",
+                "vqtbl1q_u8",
+                "vqtbl1_p8",
+                "vqtbl1q_p8",
+                "vqtbx1_s8",
+                "vqtbx1q_s8",
+                "vqtbx1_u8",
+                "vqtbx1q_u8",
+                "vqtbx1_p8",
+                "vqtbx1q_p8",
+                "vqtbl2_s8",
+                "vqtbl2q_s8",
+                "vqtbl2_u8",
+                "vqtbl2q_u8",
+                "vqtbl2_p8",
+                "vqtbl2q_p8",
+                "vqtbx2_s8",
+                "vqtbx2q_s8",
+                "vqtbx2_u8",
+                "vqtbx2q_u8",
+                "vqtbx2_p8",
+                "vqtbx2q_p8",
+                "vqtbl3_s8",
+                "vqtbl3q_s8",
+                "vqtbl3_u8",
+                "vqtbl3q_u8",
+                "vqtbl3_p8",
+                "vqtbl3q_p8",
+                "vqtbx3_s8",
+                "vqtbx3q_s8",
+                "vqtbx3_u8",
+                "vqtbx3q_u8",
+                "vqtbx3_p8",
+                "vqtbx3q_p8",
+                "vqtbl4_s8",
+                "vqtbl4q_s8",
+                "vqtbl4_u8",
+                "vqtbl4q_u8",
+                "vqtbl4_p8",
+                "vqtbl4q_p8",
+                "vqtbx4_s8",
+                "vqtbx4q_s8",
+                "vqtbx4_u8",
+                "vqtbx4q_u8",
+                "vqtbx4_p8",
+                "vqtbx4q_p8",
+                "brk",
+                "_rev_u64",
+                "_clz_u64",
+                "_rbit_u64",
+                "_cls_u32",
+                "_cls_u64",
+                "_prefetch",
+                "vsli_n_s8",
+                "vsliq_n_s8",
+                "vsli_n_s16",
+                "vsliq_n_s16",
+                "vsli_n_s32",
+                "vsliq_n_s32",
+                "vsli_n_s64",
+                "vsliq_n_s64",
+                "vsli_n_u8",
+                "vsliq_n_u8",
+                "vsli_n_u16",
+                "vsliq_n_u16",
+                "vsli_n_u32",
+                "vsliq_n_u32",
+                "vsli_n_u64",
+                "vsliq_n_u64",
+                "vsli_n_p8",
+                "vsliq_n_p8",
+                "vsli_n_p16",
+                "vsliq_n_p16",
+                "vsli_n_p64",
+                "vsliq_n_p64",
+                "vsri_n_s8",
+                "vsriq_n_s8",
+                "vsri_n_s16",
+                "vsriq_n_s16",
+                "vsri_n_s32",
+                "vsriq_n_s32",
+                "vsri_n_s64",
+                "vsriq_n_s64",
+                "vsri_n_u8",
+                "vsriq_n_u8",
+                "vsri_n_u16",
+                "vsriq_n_u16",
+                "vsri_n_u32",
+                "vsriq_n_u32",
+                "vsri_n_u64",
+                "vsriq_n_u64",
+                "vsri_n_p8",
+                "vsriq_n_p8",
+                "vsri_n_p16",
+                "vsriq_n_p16",
+                "vsri_n_p64",
+                "vsriq_n_p64",
+                "__smulbb",
+                "__smultb",
+                "__smulbt",
+                "__smultt",
+                "__smulwb",
+                "__smulwt",
+                "__qadd",
+                "__qsub",
+                "__qdbl",
+                "__smlabb",
+                "__smlabt",
+                "__smlatb",
+                "__smlatt",
+                "__smlawb",
+                "__smlawt",
+                "__qadd8",
+                "__qsub8",
+                "__qsub16",
+                "__qadd16",
+                "__qasx",
+                "__qsax",
+                "__sadd16",
+                "__sadd8",
+                "__smlad",
+                "__smlsd",
+                "__sasx",
+                "__sel",
+                "__shadd8",
+                "__shadd16",
+                "__shsub8",
+                "__usub8",
+                "__ssub8",
+                "__shsub16",
+                "__smuad",
+                "__smuadx",
+                "__smusd",
+                "__smusdx",
+                "__usad8",
+                "__usada8",
+                "__ldrex",
+                "__strex",
+                "__ldrexb",
+                "__strexb",
+                "__ldrexh",
+                "__strexh",
+                "__clrex",
+                "__dbg",
+            ];
+            if !skip.contains(&rust.name) {
+                println!(
+                    "missing run-time test named `test_{}` for `{}`",
+                    {
+                        let mut id = rust.name;
+                        while id.starts_with('_') {
+                            id = &id[1..];
+                        }
+                        id
+                    },
+                    rust.name
+                );
+                all_valid = false;
+            }
+        }
+
+        // Skip some intrinsics that aren't NEON and are located in different
+        // places than the whitelists below.
+        match rust.name {
+            "brk" | "__breakpoint" | "udf" | "_prefetch" => continue,
+            _ => {}
+        }
+        // Skip some intrinsics that are present in GCC and Clang but
+        // are missing from the official documentation.
+        let skip_intrinsic_verify = [
+            "vmov_n_p64",
+            "vmovq_n_p64",
+            "vreinterpret_p64_s64",
+            "vreinterpret_f32_p64",
+            "vreinterpretq_f32_p64",
+            "vreinterpretq_p64_p128",
+            "vreinterpretq_p128_p64",
+            "vreinterpretq_f32_p128",
+            "vqrdmlahh_s16",
+            "vqrdmlahs_s32",
+            "vqrdmlahh_lane_s16",
+            "vqrdmlahh_laneq_s16",
+            "vqrdmlahs_lane_s32",
+            "vqrdmlahs_laneq_s32",
+            "vqrdmlah_s16",
+            "vqrdmlah_s32",
+            "vqrdmlahq_s16",
+            "vqrdmlahq_s32",
+            "vqrdmlah_lane_s16",
+            "vqrdmlah_laneq_s16",
+            "vqrdmlahq_lane_s16",
+            "vqrdmlahq_laneq_s16",
+            "vqrdmlah_lane_s32",
+            "vqrdmlah_laneq_s32",
+            "vqrdmlahq_lane_s32",
+            "vqrdmlahq_laneq_s32",
+            "vqrdmlshh_s16",
+            "vqrdmlshs_s32",
+            "vqrdmlshh_lane_s16",
+            "vqrdmlshh_laneq_s16",
+            "vqrdmlshs_lane_s32",
+            "vqrdmlshs_laneq_s32",
+            "vqrdmlsh_s16",
+            "vqrdmlshq_s16",
+            "vqrdmlsh_s32",
+            "vqrdmlshq_s32",
+            "vqrdmlsh_lane_s16",
+            "vqrdmlsh_laneq_s16",
+            "vqrdmlshq_lane_s16",
+            "vqrdmlshq_laneq_s16",
+            "vqrdmlsh_lane_s32",
+            "vqrdmlsh_laneq_s32",
+            "vqrdmlshq_lane_s32",
+            "vqrdmlshq_laneq_s32",
+            "vcadd_rot270_f32",
+            "vcadd_rot90_f32",
+            "vcaddq_rot270_f32",
+            "vcaddq_rot270_f64",
+            "vcaddq_rot90_f32",
+            "vcaddq_rot90_f64",
+            "vcmla_f32",
+            "vcmlaq_f32",
+            "vcmlaq_f64",
+            "vcmla_rot90_f32",
+            "vcmlaq_rot90_f32",
+            "vcmlaq_rot90_f64",
+            "vcmla_rot180_f32",
+            "vcmlaq_rot180_f32",
+            "vcmlaq_rot180_f64",
+            "vcmla_rot270_f32",
+            "vcmlaq_rot270_f32",
+            "vcmlaq_rot270_f64",
+            "vcmla_lane_f32",
+            "vcmla_laneq_f32",
+            "vcmlaq_lane_f32",
+            "vcmlaq_laneq_f32",
+            "vcmla_rot90_lane_f32",
+            "vcmla_rot90_laneq_f32",
+            "vcmlaq_rot90_lane_f32",
+            "vcmlaq_rot90_laneq_f32",
+            "vcmla_rot180_lane_f32",
+            "vcmla_rot180_laneq_f32",
+            "vcmlaq_rot180_lane_f32",
+            "vcmlaq_rot180_laneq_f32",
+            "vcmla_rot270_lane_f32",
+            "vcmla_rot270_laneq_f32",
+            "vcmlaq_rot270_lane_f32",
+            "vcmlaq_rot270_laneq_f32",
+            "vdot_s32",
+            "vdot_u32",
+            "vdotq_s32",
+            "vdotq_u32",
+            "vdot_lane_s32",
+            "vdot_laneq_s32",
+            "vdotq_lane_s32",
+            "vdotq_laneq_s32",
+            "vdot_lane_u32",
+            "vdot_laneq_u32",
+            "vdotq_lane_u32",
+            "vdotq_laneq_u32",
+            "vbcaxq_s8",
+            "vbcaxq_s16",
+            "vbcaxq_s32",
+            "vbcaxq_s64",
+            "vbcaxq_u8",
+            "vbcaxq_u16",
+            "vbcaxq_u32",
+            "vbcaxq_u64",
+            "veor3q_s8",
+            "veor3q_s16",
+            "veor3q_s32",
+            "veor3q_s64",
+            "veor3q_u8",
+            "veor3q_u16",
+            "veor3q_u32",
+            "veor3q_u64",
+            "vadd_p8",
+            "vadd_p16",
+            "vadd_p64",
+            "vaddq_p8",
+            "vaddq_p16",
+            "vaddq_p64",
+            "vaddq_p128",
+            "vsm4ekeyq_u32",
+            "vsm4eq_u32",
+            "vmmlaq_s32",
+            "vmmlaq_u32",
+            "vusmmlaq_s32",
+            "vsm3partw1q_u32",
+            "vsm3partw2q_u32",
+            "vsm3ss1q_u32",
+            "vsm3tt1aq_u32",
+            "vsm3tt1bq_u32",
+            "vsm3tt2aq_u32",
+            "vsm3tt2bq_u32",
+            "vrax1q_u64",
+            "vxarq_u64",
+            "vsha512hq_u64",
+            "vsha512h2q_u64",
+            "vsha512su0q_u64",
+            "vsha512su1q_u64",
+            "vrnd32x_f32",
+            "vrnd32xq_f32",
+            "vrnd32z_f32",
+            "vrnd32zq_f32",
+            "vrnd64x_f32",
+            "vrnd64xq_f32",
+            "vrnd64z_f32",
+            "vrnd64zq_f32",
+            "vcls_u8",
+            "vcls_u16",
+            "vcls_u32",
+            "vclsq_u8",
+            "vclsq_u16",
+            "vclsq_u32",
+            "vtst_p16",
+            "vtstq_p16",
+            "__dbg",
+        ];
+        let arm = match map.get(rust.name) {
+            Some(i) => i,
+            None => {
+                // Skip all these intrinsics as they're not listed in NEON
+                // descriptions online.
+                //
+                // TODO: we still need to verify these intrinsics or find a
+                // reference for them, need to figure out where though!
+                if !rust.file.ends_with("dsp.rs\"")
+                    && !rust.file.ends_with("simd32.rs\"")
+                    && !rust.file.ends_with("cmsis.rs\"")
+                    && !rust.file.ends_with("v6.rs\"")
+                    && !rust.file.ends_with("v7.rs\"")
+                    && !rust.file.ends_with("v8.rs\"")
+                    && !rust.file.ends_with("tme.rs\"")
+                    && !rust.file.ends_with("ex.rs\"")
+                    && !skip_intrinsic_verify.contains(&rust.name)
+                {
+                    println!(
+                        "missing arm definition for {:?} in {}",
+                        rust.name, rust.file
+                    );
+                    all_valid = false;
+                }
+                continue;
+            }
+        };
+
+        if let Err(e) = matches(rust, arm) {
+            println!("failed to verify `{}`", rust.name);
+            println!("  * {}", e);
+            all_valid = false;
+        }
+    }
+    assert!(all_valid);
+}
+
+fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> {
+    if rust.ret != arm.ret.as_ref() {
+        bail!("mismatched return value")
+    }
+    if rust.arguments.len() != arm.arguments.len() {
+        bail!("mismatched argument lengths");
+    }
+
+    let mut nconst = 0;
+    let iter = rust.arguments.iter().zip(&arm.arguments).enumerate();
+    for (i, (rust_ty, (arm, arm_const))) in iter {
+        if *rust_ty != arm {
+            bail!("mismatched arguments")
+        }
+        if *arm_const {
+            nconst += 1;
+            if !rust.required_const.contains(&i) {
+                bail!("argument const mismatch");
+            }
+        }
+    }
+    if nconst != rust.required_const.len() {
+        bail!("wrong number of const arguments");
+    }
+
+    if rust.instrs.is_empty() {
+        bail!(
+            "instruction not listed for `{}`, but arm lists {:?}",
+            rust.name,
+            arm.instruction
+        );
+    } else if false
+    /* not super reliable, but can be used to manually check */
+    {
+        for instr in rust.instrs {
+            if arm.instruction.starts_with(instr) {
+                continue;
+            }
+            // sometimes arm says `foo` and disassemblers say `vfoo`, or
+            // sometimes disassemblers say `vfoo` and arm says `sfoo` or `ffoo`
+            if instr.starts_with('v')
+                && (arm.instruction.starts_with(&instr[1..])
+                    || arm.instruction[1..].starts_with(&instr[1..]))
+            {
+                continue;
+            }
+            bail!(
+                "arm failed to list `{}` as an instruction for `{}` in {:?}",
+                instr,
+                rust.name,
+                arm.instruction,
+            );
+        }
+    }
+
+    // TODO: verify `target_feature`.
+
+    Ok(())
+}
+
+fn find_accordion(node: &Rc<Node>) -> Option<Rc<Node>> {
+    if let NodeData::Element { attrs, .. } = &node.data {
+        for attr in attrs.borrow().iter() {
+            if attr.name.local.eq_str_ignore_ascii_case("class")
+                && attr.value.to_string() == "intrinsic-accordion"
+            {
+                return Some(node.clone());
+            }
+        }
+    }
+
+    node.children
+        .borrow()
+        .iter()
+        .filter_map(|node| find_accordion(node))
+        .next()
+}
+
+#[derive(PartialEq)]
+struct Intrinsic {
+    name: String,
+    ret: Option<Type>,
+    arguments: Vec<(Type, bool)>,
+    instruction: String,
+}
+
+fn parse_intrinsics(node: &Rc<Node>) -> HashMap<String, Intrinsic> {
+    let mut ret = HashMap::new();
+    for child in node.children.borrow().iter() {
+        if let NodeData::Element { .. } = child.data {
+            let f = parse_intrinsic(child);
+            ret.insert(f.name.clone(), f);
+        }
+    }
+    ret
+}
+
+fn parse_intrinsic(node: &Rc<Node>) -> Intrinsic {
+    // <div class='intrinsic'>
+    //  <input>...</input>
+    //  <label for=$name>
+    //    <div>
+    //      $signature...
+    //  <article>
+    //    ...
+
+    let children = node.children.borrow();
+    let mut children = children
+        .iter()
+        .filter(|node| matches!(node.data, NodeData::Element { .. }));
+    let _input = children.next().expect("no <input>");
+    let label = children.next().expect("no <label>");
+    let article = children.next().expect("no <article>");
+    assert!(children.next().is_none());
+
+    // Find `for="..."` in `<label>`
+    let name = match &label.data {
+        NodeData::Element { attrs, .. } => attrs
+            .borrow()
+            .iter()
+            .filter(|attr| attr.name.local.eq_str_ignore_ascii_case("for"))
+            .map(|attr| attr.value.to_string())
+            .next()
+            .expect("no `for` attribute"),
+        _ => panic!(),
+    };
+
+    // Find contents of inner `<div>` in `<label>`
+    let label_children = label.children.borrow();
+    let mut label_children = label_children
+        .iter()
+        .filter(|node| matches!(node.data, NodeData::Element { .. }));
+    let label_div = label_children.next().expect("no <div> in <label>");
+    assert!(label_children.next().is_none());
+    let text = label_div.children.borrow();
+    let mut text = text.iter().filter_map(|node| match &node.data {
+        NodeData::Text { contents } => Some(contents.borrow().to_string()),
+        _ => None,
+    });
+    let ret = text.next().unwrap();
+    let ret = ret.trim();
+    let args = text.next().unwrap();
+    let args = args.trim();
+    assert!(text.next().is_none());
+
+    // Find the instruction within the article
+    let article_children = article.children.borrow();
+    let mut article_children = article_children
+        .iter()
+        .filter(|node| matches!(node.data, NodeData::Element { .. }));
+    let mut instruction = None;
+    while let Some(child) = article_children.next() {
+        let mut header = String::new();
+        collect_text(&mut header, child);
+        if !header.ends_with(" Instruction") {
+            continue;
+        }
+        let next = article_children.next().expect("no next child");
+        assert!(instruction.is_none());
+        let mut instr = String::new();
+        collect_text(&mut instr, &next);
+        instruction = Some(instr);
+    }
+
+    let instruction = match instruction {
+        Some(s) => s.trim().to_lowercase(),
+        None => panic!("can't find instruction for `{}`", name),
+    };
+
+    Intrinsic {
+        name,
+        ret: if ret == "void" {
+            None
+        } else {
+            Some(parse_ty(ret))
+        },
+        instruction,
+        arguments: args // "(...)"
+            .trim_start_matches('(') // "...)"
+            .trim_end_matches(')') // "..."
+            .split(',') // " Type name ", ".."
+            .map(|s| s.trim()) // "Type name"
+            .map(|s| s.rsplitn(2, ' ').nth(1).unwrap()) // "Type"
+            .map(|s| {
+                let const_ = "const ";
+                if s.starts_with(const_) {
+                    (parse_ty(&s[const_.len()..]), true)
+                } else {
+                    (parse_ty(s), false)
+                }
+            })
+            .collect(),
+    }
+}
+
+fn parse_ty(s: &str) -> Type {
+    let suffix = " const *";
+    if s.ends_with(suffix) {
+        Type::ConstPtr(parse_ty_base(&s[..s.len() - suffix.len()]))
+    } else if s.ends_with(" *") {
+        Type::MutPtr(parse_ty_base(&s[..s.len() - 2]))
+    } else {
+        *parse_ty_base(s)
+    }
+}
+
+fn parse_ty_base(s: &str) -> &'static Type {
+    match s {
+        "float16_t" => &F16,
+        "float16x4_t" => &F16X4,
+        "float16x4x2_t" => &F16X4X2,
+        "float16x4x3_t" => &F16X4X3,
+        "float16x4x4_t" => &F16X4X4,
+        "float16x8_t" => &F16X8,
+        "float16x8x2_t" => &F16X8X2,
+        "float16x8x3_t" => &F16X8X3,
+        "float16x8x4_t" => &F16X8X4,
+        "float32_t" => &F32,
+        "float32x2_t" => &F32X2,
+        "float32x2x2_t" => &F32X2X2,
+        "float32x2x3_t" => &F32X2X3,
+        "float32x2x4_t" => &F32X2X4,
+        "float32x4_t" => &F32X4,
+        "float32x4x2_t" => &F32X4X2,
+        "float32x4x3_t" => &F32X4X3,
+        "float32x4x4_t" => &F32X4X4,
+        "float64_t" => &F64,
+        "float64x1_t" => &F64X1,
+        "float64x1x2_t" => &F64X1X2,
+        "float64x1x3_t" => &F64X1X3,
+        "float64x1x4_t" => &F64X1X4,
+        "float64x2_t" => &F64X2,
+        "float64x2x2_t" => &F64X2X2,
+        "float64x2x3_t" => &F64X2X3,
+        "float64x2x4_t" => &F64X2X4,
+        "int16_t" => &I16,
+        "int16x2_t" => &I16X2,
+        "int16x4_t" => &I16X4,
+        "int16x4x2_t" => &I16X4X2,
+        "int16x4x3_t" => &I16X4X3,
+        "int16x4x4_t" => &I16X4X4,
+        "int16x8_t" => &I16X8,
+        "int16x8x2_t" => &I16X8X2,
+        "int16x8x3_t" => &I16X8X3,
+        "int16x8x4_t" => &I16X8X4,
+        "int32_t" | "int" => &I32,
+        "int32x2_t" => &I32X2,
+        "int32x2x2_t" => &I32X2X2,
+        "int32x2x3_t" => &I32X2X3,
+        "int32x2x4_t" => &I32X2X4,
+        "int32x4_t" => &I32X4,
+        "int32x4x2_t" => &I32X4X2,
+        "int32x4x3_t" => &I32X4X3,
+        "int32x4x4_t" => &I32X4X4,
+        "int64_t" => &I64,
+        "int64x1_t" => &I64X1,
+        "int64x1x2_t" => &I64X1X2,
+        "int64x1x3_t" => &I64X1X3,
+        "int64x1x4_t" => &I64X1X4,
+        "int64x2_t" => &I64X2,
+        "int64x2x2_t" => &I64X2X2,
+        "int64x2x3_t" => &I64X2X3,
+        "int64x2x4_t" => &I64X2X4,
+        "int8_t" => &I8,
+        "int8x16_t" => &I8X16,
+        "int8x16x2_t" => &I8X16X2,
+        "int8x16x3_t" => &I8X16X3,
+        "int8x16x4_t" => &I8X16X4,
+        "int8x4_t" => &I8X4,
+        "int8x8_t" => &I8X8,
+        "int8x8x2_t" => &I8X8X2,
+        "int8x8x3_t" => &I8X8X3,
+        "int8x8x4_t" => &I8X8X4,
+        "poly128_t" => &P128,
+        "poly16_t" => &P16,
+        "poly16x4_t" => &POLY16X4,
+        "poly16x4x2_t" => &P16X4X2,
+        "poly16x4x3_t" => &P16X4X3,
+        "poly16x4x4_t" => &P16X4X4,
+        "poly16x8_t" => &POLY16X8,
+        "poly16x8x2_t" => &P16X8X2,
+        "poly16x8x3_t" => &P16X8X3,
+        "poly16x8x4_t" => &P16X8X4,
+        "poly64_t" => &P64,
+        "poly64x1_t" => &POLY64X1,
+        "poly64x1x2_t" => &P64X1X2,
+        "poly64x1x3_t" => &P64X1X3,
+        "poly64x1x4_t" => &P64X1X4,
+        "poly64x2_t" => &POLY64X2,
+        "poly64x2x2_t" => &P64X2X2,
+        "poly64x2x3_t" => &P64X2X3,
+        "poly64x2x4_t" => &P64X2X4,
+        "poly8_t" => &P8,
+        "poly8x16_t" => &POLY8X16,
+        "poly8x16x2_t" => &POLY8X16X2,
+        "poly8x16x3_t" => &POLY8X16X3,
+        "poly8x16x4_t" => &POLY8X16X4,
+        "poly8x8_t" => &POLY8X8,
+        "poly8x8x2_t" => &POLY8X8X2,
+        "poly8x8x3_t" => &POLY8X8X3,
+        "poly8x8x4_t" => &POLY8X8X4,
+        "uint16_t" => &U16,
+        "uint16x4_t" => &U16X4,
+        "uint16x4x2_t" => &U16X4X2,
+        "uint16x4x3_t" => &U16X4X3,
+        "uint16x4x4_t" => &U16X4X4,
+        "uint16x8_t" => &U16X8,
+        "uint16x8x2_t" => &U16X8X2,
+        "uint16x8x3_t" => &U16X8X3,
+        "uint16x8x4_t" => &U16X8X4,
+        "uint32_t" => &U32,
+        "uint32x2_t" => &U32X2,
+        "uint32x2x2_t" => &U32X2X2,
+        "uint32x2x3_t" => &U32X2X3,
+        "uint32x2x4_t" => &U32X2X4,
+        "uint32x4_t" => &U32X4,
+        "uint32x4x2_t" => &U32X4X2,
+        "uint32x4x3_t" => &U32X4X3,
+        "uint32x4x4_t" => &U32X4X4,
+        "uint64_t" => &U64,
+        "uint64x1_t" => &U64X1,
+        "uint64x1x2_t" => &U64X1X2,
+        "uint64x1x3_t" => &U64X1X3,
+        "uint64x1x4_t" => &U64X1X4,
+        "uint64x2_t" => &U64X2,
+        "uint64x2x2_t" => &U64X2X2,
+        "uint64x2x3_t" => &U64X2X3,
+        "uint64x2x4_t" => &U64X2X4,
+        "uint8_t" => &U8,
+        "uint8x16_t" => &U8X16,
+        "uint8x16x2_t" => &U8X16X2,
+        "uint8x16x3_t" => &U8X16X3,
+        "uint8x16x4_t" => &U8X16X4,
+        "uint8x8_t" => &U8X8,
+        "uint8x8x2_t" => &U8X8X2,
+        "uint8x8x3_t" => &U8X8X3,
+        "uint8x8x4_t" => &U8X8X4,
+
+        _ => panic!("failed to parse html type {:?}", s),
+    }
+}
+
+fn collect_text(s: &mut String, node: &Node) {
+    if let NodeData::Text { contents } = &node.data {
+        s.push(' ');
+        s.push_str(&contents.borrow().to_string());
+    }
+    for child in node.children.borrow().iter() {
+        collect_text(s, child);
+    }
+}
diff --git a/library/stdarch/crates/stdarch-verify/tests/mips.rs b/library/stdarch/crates/stdarch-verify/tests/mips.rs
new file mode 100644
index 000000000..1eb86dc29
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/tests/mips.rs
@@ -0,0 +1,366 @@
+//! Verification of MIPS MSA intrinsics
+#![allow(bad_style, unused)]
+
+// This file is obtained from
+// https://gcc.gnu.org/onlinedocs//gcc/MIPS-SIMD-Architecture-Built-in-Functions.html
+static HEADER: &str = include_str!("../mips-msa.h");
+
+stdarch_verify::mips_functions!(static FUNCTIONS);
+
+struct Function {
+    name: &'static str,
+    arguments: &'static [&'static Type],
+    ret: Option<&'static Type>,
+    target_feature: Option<&'static str>,
+    instrs: &'static [&'static str],
+    file: &'static str,
+    required_const: &'static [usize],
+    has_test: bool,
+}
+
+static F16: Type = Type::PrimFloat(16);
+static F32: Type = Type::PrimFloat(32);
+static F64: Type = Type::PrimFloat(64);
+static I8: Type = Type::PrimSigned(8);
+static I16: Type = Type::PrimSigned(16);
+static I32: Type = Type::PrimSigned(32);
+static I64: Type = Type::PrimSigned(64);
+static U8: Type = Type::PrimUnsigned(8);
+static U16: Type = Type::PrimUnsigned(16);
+static U32: Type = Type::PrimUnsigned(32);
+static U64: Type = Type::PrimUnsigned(64);
+static NEVER: Type = Type::Never;
+static TUPLE: Type = Type::Tuple;
+static v16i8: Type = Type::I(8, 16, 1);
+static v8i16: Type = Type::I(16, 8, 1);
+static v4i32: Type = Type::I(32, 4, 1);
+static v2i64: Type = Type::I(64, 2, 1);
+static v16u8: Type = Type::U(8, 16, 1);
+static v8u16: Type = Type::U(16, 8, 1);
+static v4u32: Type = Type::U(32, 4, 1);
+static v2u64: Type = Type::U(64, 2, 1);
+static v8f16: Type = Type::F(16, 8, 1);
+static v4f32: Type = Type::F(32, 4, 1);
+static v2f64: Type = Type::F(64, 2, 1);
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+enum Type {
+    PrimFloat(u8),
+    PrimSigned(u8),
+    PrimUnsigned(u8),
+    PrimPoly(u8),
+    MutPtr(&'static Type),
+    ConstPtr(&'static Type),
+    Tuple,
+    I(u8, u8, u8),
+    U(u8, u8, u8),
+    P(u8, u8, u8),
+    F(u8, u8, u8),
+    Never,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[allow(non_camel_case_types)]
+enum MsaTy {
+    v16i8,
+    v8i16,
+    v4i32,
+    v2i64,
+    v16u8,
+    v8u16,
+    v4u32,
+    v2u64,
+    v8f16,
+    v4f32,
+    v2f64,
+    imm0_1,
+    imm0_3,
+    imm0_7,
+    imm0_15,
+    imm0_31,
+    imm0_63,
+    imm0_255,
+    imm_n16_15,
+    imm_n512_511,
+    imm_n1024_1022,
+    imm_n2048_2044,
+    imm_n4096_4088,
+    i32,
+    u32,
+    i64,
+    u64,
+    Void,
+    MutVoidPtr,
+}
+
+impl<'a> From<&'a str> for MsaTy {
+    fn from(s: &'a str) -> MsaTy {
+        match s {
+            "v16i8" => MsaTy::v16i8,
+            "v8i16" => MsaTy::v8i16,
+            "v4i32" => MsaTy::v4i32,
+            "v2i64" => MsaTy::v2i64,
+            "v16u8" => MsaTy::v16u8,
+            "v8u16" => MsaTy::v8u16,
+            "v4u32" => MsaTy::v4u32,
+            "v2u64" => MsaTy::v2u64,
+            "v8f16" => MsaTy::v8f16,
+            "v4f32" => MsaTy::v4f32,
+            "v2f64" => MsaTy::v2f64,
+            "imm0_1" => MsaTy::imm0_1,
+            "imm0_3" => MsaTy::imm0_3,
+            "imm0_7" => MsaTy::imm0_7,
+            "imm0_15" => MsaTy::imm0_15,
+            "imm0_31" => MsaTy::imm0_31,
+            "imm0_63" => MsaTy::imm0_63,
+            "imm0_255" => MsaTy::imm0_255,
+            "imm_n16_15" => MsaTy::imm_n16_15,
+            "imm_n512_511" => MsaTy::imm_n512_511,
+            "imm_n1024_1022" => MsaTy::imm_n1024_1022,
+            "imm_n2048_2044" => MsaTy::imm_n2048_2044,
+            "imm_n4096_4088" => MsaTy::imm_n4096_4088,
+            "i32" => MsaTy::i32,
+            "u32" => MsaTy::u32,
+            "i64" => MsaTy::i64,
+            "u64" => MsaTy::u64,
+            "void" => MsaTy::Void,
+            "void *" => MsaTy::MutVoidPtr,
+            v => panic!("unknown ty: \"{}\"", v),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+struct MsaIntrinsic {
+    id: String,
+    arg_tys: Vec<MsaTy>,
+    ret_ty: MsaTy,
+    instruction: String,
+}
+
+struct NoneError;
+
+impl std::convert::TryFrom<&'static str> for MsaIntrinsic {
+    // The intrinsics are just C function declarations of the form:
+    // $ret_ty __builtin_${fn_id}($($arg_ty),*);
+    type Error = NoneError;
+    fn try_from(line: &'static str) -> Result<Self, Self::Error> {
+        return inner(line).ok_or(NoneError);
+
+        fn inner(line: &'static str) -> Option<MsaIntrinsic> {
+            let first_whitespace = line.find(char::is_whitespace)?;
+            let ret_ty = &line[0..first_whitespace];
+            let ret_ty = MsaTy::from(ret_ty);
+
+            let first_parentheses = line.find('(')?;
+            assert!(first_parentheses > first_whitespace);
+            let id = &line[first_whitespace + 1..first_parentheses].trim();
+            assert!(id.starts_with("__builtin"));
+            let mut id_str = "_".to_string();
+            id_str += &id[9..];
+            let id = id_str;
+
+            let mut arg_tys = Vec::new();
+
+            let last_parentheses = line.find(')')?;
+            for arg in (&line[first_parentheses + 1..last_parentheses]).split(',') {
+                let arg = arg.trim();
+                arg_tys.push(MsaTy::from(arg));
+            }
+
+            // The instruction is the intrinsic name without the __msa_ prefix.
+            let instruction = &id[6..];
+            let mut instruction = instruction.to_string();
+            // With all underscores but the first one replaced with a `.`
+            if let Some(first_underscore) = instruction.find('_') {
+                let postfix = instruction[first_underscore + 1..].replace('_', ".");
+                instruction = instruction[0..=first_underscore].to_string();
+                instruction += &postfix;
+            }
+
+            Some(MsaIntrinsic {
+                id,
+                ret_ty,
+                arg_tys,
+                instruction,
+            })
+        }
+    }
+}
+
+#[test]
+fn verify_all_signatures() {
+    // Parse the C intrinsic header file:
+    let mut intrinsics = std::collections::HashMap::<String, MsaIntrinsic>::new();
+    for line in HEADER.lines() {
+        if line.is_empty() {
+            continue;
+        }
+
+        use std::convert::TryFrom;
+        let intrinsic: MsaIntrinsic = TryFrom::try_from(line)
+            .unwrap_or_else(|_| panic!("failed to parse line: \"{}\"", line));
+        assert!(!intrinsics.contains_key(&intrinsic.id));
+        intrinsics.insert(intrinsic.id.clone(), intrinsic);
+    }
+
+    let mut all_valid = true;
+    for rust in FUNCTIONS {
+        if !rust.has_test {
+            let skip = [
+                "__msa_ceqi_d",
+                "__msa_cfcmsa",
+                "__msa_clei_s_d",
+                "__msa_clti_s_d",
+                "__msa_ctcmsa",
+                "__msa_ldi_d",
+                "__msa_maxi_s_d",
+                "__msa_mini_s_d",
+                "break_",
+            ];
+            if !skip.contains(&rust.name) {
+                println!(
+                    "missing run-time test named `test_{}` for `{}`",
+                    {
+                        let mut id = rust.name;
+                        while id.starts_with('_') {
+                            id = &id[1..];
+                        }
+                        id
+                    },
+                    rust.name
+                );
+                all_valid = false;
+            }
+        }
+
+        // Skip some intrinsics that aren't part of MSA
+        match rust.name {
+            "break_" => continue,
+            _ => {}
+        }
+        let mips = match intrinsics.get(rust.name) {
+            Some(i) => i,
+            None => {
+                eprintln!(
+                    "missing mips definition for {:?} in {}",
+                    rust.name, rust.file
+                );
+                all_valid = false;
+                continue;
+            }
+        };
+
+        if let Err(e) = matches(rust, mips) {
+            println!("failed to verify `{}`", rust.name);
+            println!("  * {}", e);
+            all_valid = false;
+        }
+    }
+    assert!(all_valid);
+}
+
+fn matches(rust: &Function, mips: &MsaIntrinsic) -> Result<(), String> {
+    macro_rules! bail {
+        ($($t:tt)*) => (return Err(format!($($t)*)))
+    }
+
+    if rust.ret.is_none() && mips.ret_ty != MsaTy::Void {
+        bail!("mismatched return value")
+    }
+
+    if rust.arguments.len() != mips.arg_tys.len() {
+        bail!("mismatched argument lengths");
+    }
+
+    let mut nconst = 0;
+    for (i, (rust_arg, mips_arg)) in rust.arguments.iter().zip(mips.arg_tys.iter()).enumerate() {
+        match mips_arg {
+            MsaTy::v16i8 if **rust_arg == v16i8 => (),
+            MsaTy::v8i16 if **rust_arg == v8i16 => (),
+            MsaTy::v4i32 if **rust_arg == v4i32 => (),
+            MsaTy::v2i64 if **rust_arg == v2i64 => (),
+            MsaTy::v16u8 if **rust_arg == v16u8 => (),
+            MsaTy::v8u16 if **rust_arg == v8u16 => (),
+            MsaTy::v4u32 if **rust_arg == v4u32 => (),
+            MsaTy::v2u64 if **rust_arg == v2u64 => (),
+            MsaTy::v4f32 if **rust_arg == v4f32 => (),
+            MsaTy::v2f64 if **rust_arg == v2f64 => (),
+            MsaTy::imm0_1
+            | MsaTy::imm0_3
+            | MsaTy::imm0_7
+            | MsaTy::imm0_15
+            | MsaTy::imm0_31
+            | MsaTy::imm0_63
+            | MsaTy::imm0_255
+            | MsaTy::imm_n16_15
+            | MsaTy::imm_n512_511
+            | MsaTy::imm_n1024_1022
+            | MsaTy::imm_n2048_2044
+            | MsaTy::imm_n4096_4088
+                if **rust_arg == I32 => {}
+            MsaTy::i32 if **rust_arg == I32 => (),
+            MsaTy::i64 if **rust_arg == I64 => (),
+            MsaTy::u32 if **rust_arg == U32 => (),
+            MsaTy::u64 if **rust_arg == U64 => (),
+            MsaTy::MutVoidPtr if **rust_arg == Type::MutPtr(&U8) => (),
+            m => bail!(
+                "mismatched argument \"{}\"= \"{:?}\" != \"{:?}\"",
+                i,
+                m,
+                *rust_arg
+            ),
+        }
+
+        let is_const = matches!(
+            mips_arg,
+            MsaTy::imm0_1
+                | MsaTy::imm0_3
+                | MsaTy::imm0_7
+                | MsaTy::imm0_15
+                | MsaTy::imm0_31
+                | MsaTy::imm0_63
+                | MsaTy::imm0_255
+                | MsaTy::imm_n16_15
+                | MsaTy::imm_n512_511
+                | MsaTy::imm_n1024_1022
+                | MsaTy::imm_n2048_2044
+                | MsaTy::imm_n4096_4088
+        );
+        if is_const {
+            nconst += 1;
+            if !rust.required_const.contains(&i) {
+                bail!("argument const mismatch");
+            }
+        }
+    }
+
+    if nconst != rust.required_const.len() {
+        bail!("wrong number of const arguments");
+    }
+
+    if rust.target_feature != Some("msa") {
+        bail!("wrong target_feature");
+    }
+
+    if !rust.instrs.is_empty() {
+        // Normalize slightly to get rid of assembler differences
+        let actual = rust.instrs[0].replace(".", "_");
+        let expected = mips.instruction.replace(".", "_");
+        if actual != expected {
+            bail!(
+                "wrong instruction: \"{}\" != \"{}\"",
+                rust.instrs[0],
+                mips.instruction
+            );
+        }
+    } else {
+        bail!(
+            "missing assert_instr for \"{}\" (should be \"{}\")",
+            mips.id,
+            mips.instruction
+        );
+    }
+
+    Ok(())
+}
diff --git a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
new file mode 100644
index 000000000..89494bfd2
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
@@ -0,0 +1,841 @@
+#![allow(bad_style)]
+#![allow(unused)]
+#![allow(
+    clippy::shadow_reuse,
+    clippy::cast_lossless,
+    clippy::match_same_arms,
+    clippy::nonminimal_bool,
+    clippy::print_stdout,
+    clippy::use_debug,
+    clippy::eq_op,
+    clippy::useless_format
+)]
+
+use std::collections::{BTreeMap, HashMap};
+
+use serde::Deserialize;
+
+const PRINT_INSTRUCTION_VIOLATIONS: bool = false;
+const PRINT_MISSING_LISTS: bool = false;
+const PRINT_MISSING_LISTS_MARKDOWN: bool = false;
+
+struct Function {
+    name: &'static str,
+    arguments: &'static [&'static Type],
+    ret: Option<&'static Type>,
+    target_feature: Option<&'static str>,
+    instrs: &'static [&'static str],
+    file: &'static str,
+    required_const: &'static [usize],
+    has_test: bool,
+}
+
+static F32: Type = Type::PrimFloat(32);
+static F64: Type = Type::PrimFloat(64);
+static I8: Type = Type::PrimSigned(8);
+static I16: Type = Type::PrimSigned(16);
+static I32: Type = Type::PrimSigned(32);
+static I64: Type = Type::PrimSigned(64);
+static U8: Type = Type::PrimUnsigned(8);
+static U16: Type = Type::PrimUnsigned(16);
+static U32: Type = Type::PrimUnsigned(32);
+static U64: Type = Type::PrimUnsigned(64);
+static U128: Type = Type::PrimUnsigned(128);
+static ORDERING: Type = Type::Ordering;
+
+static M64: Type = Type::M64;
+static M128: Type = Type::M128;
+static M128BH: Type = Type::M128BH;
+static M128I: Type = Type::M128I;
+static M128D: Type = Type::M128D;
+static M256: Type = Type::M256;
+static M256BH: Type = Type::M256BH;
+static M256I: Type = Type::M256I;
+static M256D: Type = Type::M256D;
+static M512: Type = Type::M512;
+static M512BH: Type = Type::M512BH;
+static M512I: Type = Type::M512I;
+static M512D: Type = Type::M512D;
+static MMASK8: Type = Type::MMASK8;
+static MMASK16: Type = Type::MMASK16;
+static MMASK32: Type = Type::MMASK32;
+static MMASK64: Type = Type::MMASK64;
+static MM_CMPINT_ENUM: Type = Type::MM_CMPINT_ENUM;
+static MM_MANTISSA_NORM_ENUM: Type = Type::MM_MANTISSA_NORM_ENUM;
+static MM_MANTISSA_SIGN_ENUM: Type = Type::MM_MANTISSA_SIGN_ENUM;
+static MM_PERM_ENUM: Type = Type::MM_PERM_ENUM;
+
+static TUPLE: Type = Type::Tuple;
+static CPUID: Type = Type::CpuidResult;
+static NEVER: Type = Type::Never;
+
+#[derive(Debug)]
+enum Type {
+    PrimFloat(u8),
+    PrimSigned(u8),
+    PrimUnsigned(u8),
+    MutPtr(&'static Type),
+    ConstPtr(&'static Type),
+    M64,
+    M128,
+    M128BH,
+    M128D,
+    M128I,
+    M256,
+    M256BH,
+    M256D,
+    M256I,
+    M512,
+    M512BH,
+    M512D,
+    M512I,
+    MMASK8,
+    MMASK16,
+    MMASK32,
+    MMASK64,
+    MM_CMPINT_ENUM,
+    MM_MANTISSA_NORM_ENUM,
+    MM_MANTISSA_SIGN_ENUM,
+    MM_PERM_ENUM,
+    Tuple,
+    CpuidResult,
+    Never,
+    Ordering,
+}
+
+stdarch_verify::x86_functions!(static FUNCTIONS);
+
+#[derive(Deserialize)]
+struct Data {
+    #[serde(rename = "intrinsic", default)]
+    intrinsics: Vec<Intrinsic>,
+}
+
+#[derive(Deserialize)]
+struct Intrinsic {
+    #[serde(rename = "return")]
+    return_: Return,
+    name: String,
+    #[serde(rename = "CPUID", default)]
+    cpuid: Vec<String>,
+    #[serde(rename = "parameter", default)]
+    parameters: Vec<Parameter>,
+    #[serde(default)]
+    instruction: Vec<Instruction>,
+}
+
+#[derive(Deserialize)]
+struct Parameter {
+    #[serde(rename = "type")]
+    type_: String,
+    #[serde(default)]
+    etype: String,
+}
+
+#[derive(Deserialize)]
+struct Return {
+    #[serde(rename = "type")]
+    type_: String,
+}
+
+#[derive(Deserialize, Debug)]
+struct Instruction {
+    name: String,
+}
+
+macro_rules! bail {
+    ($($t:tt)*) => (return Err(format!($($t)*)))
+}
+
+#[test]
+fn verify_all_signatures() {
+    // This XML document was downloaded from Intel's site. To update this you
+    // can visit intel's intrinsics guide online documentation:
+    //
+    //   https://software.intel.com/sites/landingpage/IntrinsicsGuide/#
+    //
+    // Open up the network console and you'll see an xml file was downloaded
+    // (currently called data-3.4.xml). That's the file we downloaded
+    // here.
+    let xml = include_bytes!("../x86-intel.xml");
+
+    let xml = &xml[..];
+    let data: Data = serde_xml_rs::from_reader(xml).expect("failed to deserialize xml");
+    let mut map = HashMap::new();
+    for intrinsic in &data.intrinsics {
+        map.entry(&intrinsic.name[..])
+            .or_insert_with(Vec::new)
+            .push(intrinsic);
+    }
+
+    let mut all_valid = true;
+    'outer: for rust in FUNCTIONS {
+        if !rust.has_test {
+            // FIXME: this list should be almost empty
+            let skip = [
+                "__readeflags",
+                "__readeflags",
+                "__writeeflags",
+                "__writeeflags",
+                "_mm_comige_ss",
+                "_mm_cvt_ss2si",
+                "_mm_cvtt_ss2si",
+                "_mm_cvt_si2ss",
+                "_mm_set_ps1",
+                "_mm_load_ps1",
+                "_mm_store_ps1",
+                "_mm_getcsr",
+                "_mm_setcsr",
+                "_MM_GET_EXCEPTION_MASK",
+                "_MM_GET_EXCEPTION_STATE",
+                "_MM_GET_FLUSH_ZERO_MODE",
+                "_MM_GET_ROUNDING_MODE",
+                "_MM_SET_EXCEPTION_MASK",
+                "_MM_SET_EXCEPTION_STATE",
+                "_MM_SET_FLUSH_ZERO_MODE",
+                "_MM_SET_ROUNDING_MODE",
+                "_mm_prefetch",
+                "_mm_undefined_ps",
+                "_m_pmaxsw",
+                "_m_pmaxub",
+                "_m_pminsw",
+                "_m_pminub",
+                "_m_pavgb",
+                "_m_pavgw",
+                "_m_psadbw",
+                "_mm_cvt_pi2ps",
+                "_m_maskmovq",
+                "_m_pextrw",
+                "_m_pinsrw",
+                "_m_pmovmskb",
+                "_m_pshufw",
+                "_mm_cvtt_ps2pi",
+                "_mm_cvt_ps2pi",
+                "__cpuid_count",
+                "__cpuid",
+                "__get_cpuid_max",
+                "_xsave",
+                "_xrstor",
+                "_xsetbv",
+                "_xgetbv",
+                "_xsaveopt",
+                "_xsavec",
+                "_xsaves",
+                "_xrstors",
+                "_mm_bslli_si128",
+                "_mm_bsrli_si128",
+                "_mm_undefined_pd",
+                "_mm_undefined_si128",
+                "_mm_cvtps_ph",
+                "_mm256_cvtps_ph",
+                "_rdtsc",
+                "__rdtscp",
+                "_mm256_castps128_ps256",
+                "_mm256_castpd128_pd256",
+                "_mm256_castsi128_si256",
+                "_mm256_undefined_ps",
+                "_mm256_undefined_pd",
+                "_mm256_undefined_si256",
+                "_bextr2_u32",
+                "_mm_tzcnt_32",
+                "_m_paddb",
+                "_m_paddw",
+                "_m_paddd",
+                "_m_paddsb",
+                "_m_paddsw",
+                "_m_paddusb",
+                "_m_paddusw",
+                "_m_psubb",
+                "_m_psubw",
+                "_m_psubd",
+                "_m_psubsb",
+                "_m_psubsw",
+                "_m_psubusb",
+                "_m_psubusw",
+                "_mm_set_pi16",
+                "_mm_set_pi32",
+                "_mm_set_pi8",
+                "_mm_set1_pi16",
+                "_mm_set1_pi32",
+                "_mm_set1_pi8",
+                "_mm_setr_pi16",
+                "_mm_setr_pi32",
+                "_mm_setr_pi8",
+                "ud2",
+                "_mm_min_epi8",
+                "_mm_min_epi32",
+                "_xbegin",
+                "_xend",
+                "_rdrand16_step",
+                "_rdrand32_step",
+                "_rdseed16_step",
+                "_rdseed32_step",
+                "_fxsave",
+                "_fxrstor",
+                "_t1mskc_u64",
+                "_mm256_shuffle_epi32",
+                "_mm256_bslli_epi128",
+                "_mm256_bsrli_epi128",
+                "_mm256_unpackhi_epi8",
+                "_mm256_unpacklo_epi8",
+                "_mm256_unpackhi_epi16",
+                "_mm256_unpacklo_epi16",
+                "_mm256_unpackhi_epi32",
+                "_mm256_unpacklo_epi32",
+                "_mm256_unpackhi_epi64",
+                "_mm256_unpacklo_epi64",
+                "_xsave64",
+                "_xrstor64",
+                "_xsaveopt64",
+                "_xsavec64",
+                "_xsaves64",
+                "_xrstors64",
+                "_mm_cvtsi64x_si128",
+                "_mm_cvtsi128_si64x",
+                "_mm_cvtsi64x_sd",
+                "cmpxchg16b",
+                "_rdrand64_step",
+                "_rdseed64_step",
+                "_bextr2_u64",
+                "_mm_tzcnt_64",
+                "_fxsave64",
+                "_fxrstor64",
+                "_mm512_undefined_ps",
+                "_mm512_undefined_pd",
+                "_mm512_undefined_epi32",
+                "_mm512_undefined",
+            ];
+            if !skip.contains(&rust.name) {
+                println!(
+                    "missing run-time test named `test_{}` for `{}`",
+                    {
+                        let mut id = rust.name;
+                        while id.starts_with('_') {
+                            id = &id[1..];
+                        }
+                        id
+                    },
+                    rust.name
+                );
+                all_valid = false;
+            }
+        }
+
+        match rust.name {
+            // These aren't defined by Intel but they're defined by what appears
+            // to be all other compilers. For more information see
+            // rust-lang/stdarch#307, and otherwise these signatures
+            // have all been manually verified.
+            "__readeflags" |
+            "__writeeflags" |
+            "__cpuid_count" |
+            "__cpuid" |
+            "__get_cpuid_max" |
+            // Not listed with intel, but manually verified
+            "cmpxchg16b" |
+            // The UD2 intrinsic is not defined by Intel, but it was agreed on
+            // in the RFC Issue 2512:
+            // https://github.com/rust-lang/rfcs/issues/2512
+            "ud2"
+                => continue,
+            // Intel requires the mask argument for _mm_shuffle_ps to be an
+            // unsigned integer, but all other _mm_shuffle_.. intrinsics
+            // take a signed-integer. This breaks `_MM_SHUFFLE` for
+            // `_mm_shuffle_ps`:
+            "_mm_shuffle_ps" => continue,
+            _ => {}
+        }
+
+        // these are all AMD-specific intrinsics
+        if let Some(feature) = rust.target_feature {
+            if feature.contains("sse4a") || feature.contains("tbm") {
+                continue;
+            }
+        }
+
+        let intel = match map.remove(rust.name) {
+            Some(i) => i,
+            None => panic!("missing intel definition for {}", rust.name),
+        };
+
+        let mut errors = Vec::new();
+        for intel in intel {
+            match matches(rust, intel) {
+                Ok(()) => continue 'outer,
+                Err(e) => errors.push(e),
+            }
+        }
+        println!("failed to verify `{}`", rust.name);
+        for error in errors {
+            println!("  * {}", error);
+        }
+        all_valid = false;
+    }
+    assert!(all_valid);
+
+    let mut missing = BTreeMap::new();
+    for (name, intel) in &map {
+        // currently focused mainly on missing SIMD intrinsics, but there's
+        // definitely some other assorted ones that we're missing.
+        if !name.starts_with("_mm") {
+            continue;
+        }
+
+        // we'll get to avx-512 later
+        // let avx512 = intel.iter().any(|i| {
+        //     i.name.starts_with("_mm512") || i.cpuid.iter().any(|c| {
+        //         c.contains("512")
+        //     })
+        // });
+        // if avx512 {
+        //     continue
+        // }
+
+        for intel in intel {
+            missing
+                .entry(&intel.cpuid)
+                .or_insert_with(Vec::new)
+                .push(intel);
+        }
+    }
+
+    // generate a bulleted list of missing intrinsics
+    if PRINT_MISSING_LISTS || PRINT_MISSING_LISTS_MARKDOWN {
+        for (k, v) in missing {
+            if PRINT_MISSING_LISTS_MARKDOWN {
+                println!("\n<details><summary>{:?}</summary><p>\n", k);
+                for intel in v {
+                    let url = format!(
+                        "https://software.intel.com/sites/landingpage\
+                         /IntrinsicsGuide/#text={}&expand=5236",
+                        intel.name
+                    );
+                    println!("  * [ ] [`{}`]({})", intel.name, url);
+                }
+                println!("</p></details>\n");
+            } else {
+                println!("\n{:?}\n", k);
+                for intel in v {
+                    println!("\t{}", intel.name);
+                }
+            }
+        }
+    }
+}
+
+fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
+    // Verify that all `#[target_feature]` annotations are correct,
+    // ensuring that we've actually enabled the right instruction
+    // set for this intrinsic.
+    match rust.name {
+        "_bswap" | "_bswap64" => {}
+
+        // These don't actually have a target feature unlike their brethren with
+        // the `x` inside the name which requires adx
+        "_addcarry_u32" | "_addcarry_u64" | "_subborrow_u32" | "_subborrow_u64" => {}
+
+        "_bittest"
+        | "_bittestandset"
+        | "_bittestandreset"
+        | "_bittestandcomplement"
+        | "_bittest64"
+        | "_bittestandset64"
+        | "_bittestandreset64"
+        | "_bittestandcomplement64" => {}
+
+        _ => {
+            if intel.cpuid.is_empty() {
+                bail!("missing cpuid for {}", rust.name);
+            }
+        }
+    }
+
+    for cpuid in &intel.cpuid {
+        // The pause intrinsic is in the SSE2 module, but it is backwards
+        // compatible with CPUs without SSE2, and it therefore does not need the
+        // target-feature attribute.
+        if rust.name == "_mm_pause" {
+            continue;
+        }
+        // this is needed by _xsave and probably some related intrinsics,
+        // but let's just skip it for now.
+        if *cpuid == "XSS" {
+            continue;
+        }
+
+        // these flags on the rdtsc/rtdscp intrinsics we don't test for right
+        // now, but we may wish to add these one day!
+        //
+        // For more info see #308
+        if *cpuid == "TSC" || *cpuid == "RDTSCP" {
+            continue;
+        }
+
+        let cpuid = cpuid
+            .chars()
+            .flat_map(|c| c.to_lowercase())
+            .collect::<String>();
+
+        // Fix mismatching feature names:
+        let fixup_cpuid = |cpuid: String| match cpuid.as_ref() {
+            // The XML file names IFMA as "avx512ifma52", while Rust calls
+            // it "avx512ifma".
+            "avx512ifma52" => String::from("avx512ifma"),
+            // The XML file names BITALG as "avx512_bitalg", while Rust calls
+            // it "avx512bitalg".
+            "avx512_bitalg" => String::from("avx512bitalg"),
+            // The XML file names VBMI as "avx512_vbmi", while Rust calls
+            // it "avx512vbmi".
+            "avx512_vbmi" => String::from("avx512vbmi"),
+            // The XML file names VBMI2 as "avx512_vbmi2", while Rust calls
+            // it "avx512vbmi2".
+            "avx512_vbmi2" => String::from("avx512vbmi2"),
+            // The XML file names VNNI as "avx512_vnni", while Rust calls
+            // it "avx512vnni".
+            "avx512_vnni" => String::from("avx512vnni"),
+            // Some AVX512f intrinsics are also supported by Knight's Corner.
+            // The XML lists them as avx512f/kncni, but we are solely gating
+            // them behind avx512f since we don't have a KNC feature yet.
+            "avx512f/kncni" => String::from("avx512f"),
+            // See: https://github.com/rust-lang/stdarch/issues/738
+            // The intrinsics guide calls `f16c` `fp16c` in disagreement with
+            // Intel's architecture manuals.
+            "fp16c" => String::from("f16c"),
+            "avx512_bf16" => String::from("avx512bf16"),
+            // The XML file names VNNI as "avx512_bf16", while Rust calls
+            // it "avx512bf16".
+            _ => cpuid,
+        };
+        let fixed_cpuid = fixup_cpuid(cpuid);
+
+        let rust_feature = rust
+            .target_feature
+            .unwrap_or_else(|| panic!("no target feature listed for {}", rust.name));
+
+        if rust_feature.contains(&fixed_cpuid) {
+            continue;
+        }
+        bail!(
+            "intel cpuid `{}` not in `{}` for {}",
+            fixed_cpuid,
+            rust_feature,
+            rust.name
+        )
+    }
+
+    if PRINT_INSTRUCTION_VIOLATIONS {
+        if rust.instrs.is_empty() {
+            if !intel.instruction.is_empty() {
+                println!(
+                    "instruction not listed for `{}`, but intel lists {:?}",
+                    rust.name, intel.instruction
+                );
+            }
+
+        // If intel doesn't list any instructions and we do then don't
+        // bother trying to look for instructions in intel, we've just got
+        // some extra assertions on our end.
+        } else if !intel.instruction.is_empty() {
+            for instr in rust.instrs {
+                let asserting = intel.instruction.iter().any(|a| a.name.starts_with(instr));
+                if !asserting {
+                    println!(
+                        "intel failed to list `{}` as an instruction for `{}`",
+                        instr, rust.name
+                    );
+                }
+            }
+        }
+    }
+
+    // Make sure we've got the right return type.
+    if let Some(t) = rust.ret {
+        equate(t, &intel.return_.type_, "", rust.name, false)?;
+    } else if intel.return_.type_ != "" && intel.return_.type_ != "void" {
+        bail!(
+            "{} returns `{}` with intel, void in rust",
+            rust.name,
+            intel.return_.type_
+        )
+    }
+
+    // If there's no arguments on Rust's side intel may list one "void"
+    // argument, so handle that here.
+    if rust.arguments.is_empty() && intel.parameters.len() == 1 {
+        if intel.parameters[0].type_ != "void" {
+            bail!("rust has 0 arguments, intel has one for")
+        }
+    } else {
+        // Otherwise we want all parameters to be exactly the same
+        if rust.arguments.len() != intel.parameters.len() {
+            bail!("wrong number of arguments on {}", rust.name)
+        }
+        for (i, (a, b)) in intel.parameters.iter().zip(rust.arguments).enumerate() {
+            let is_const = rust.required_const.contains(&i);
+            equate(b, &a.type_, &a.etype, &intel.name, is_const)?;
+        }
+    }
+
+    let any_i64 = rust
+        .arguments
+        .iter()
+        .cloned()
+        .chain(rust.ret)
+        .any(|arg| matches!(*arg, Type::PrimSigned(64) | Type::PrimUnsigned(64)));
+    let any_i64_exempt = match rust.name {
+        // These intrinsics have all been manually verified against Clang's
+        // headers to be available on x86, and the u64 arguments seem
+        // spurious I guess?
+        "_xsave" | "_xrstor" | "_xsetbv" | "_xgetbv" | "_xsaveopt" | "_xsavec" | "_xsaves"
+        | "_xrstors" => true,
+
+        // Apparently all of clang/msvc/gcc accept these intrinsics on
+        // 32-bit, so let's do the same
+        "_mm_set_epi64x"
+        | "_mm_set1_epi64x"
+        | "_mm256_set_epi64x"
+        | "_mm256_setr_epi64x"
+        | "_mm256_set1_epi64x"
+        | "_mm512_set1_epi64"
+        | "_mm256_mask_set1_epi64"
+        | "_mm256_maskz_set1_epi64"
+        | "_mm_mask_set1_epi64"
+        | "_mm_maskz_set1_epi64"
+        | "_mm512_set4_epi64"
+        | "_mm512_setr4_epi64"
+        | "_mm512_set_epi64"
+        | "_mm512_setr_epi64"
+        | "_mm512_reduce_add_epi64"
+        | "_mm512_mask_reduce_add_epi64"
+        | "_mm512_reduce_mul_epi64"
+        | "_mm512_mask_reduce_mul_epi64"
+        | "_mm512_reduce_max_epi64"
+        | "_mm512_mask_reduce_max_epi64"
+        | "_mm512_reduce_max_epu64"
+        | "_mm512_mask_reduce_max_epu64"
+        | "_mm512_reduce_min_epi64"
+        | "_mm512_mask_reduce_min_epi64"
+        | "_mm512_reduce_min_epu64"
+        | "_mm512_mask_reduce_min_epu64"
+        | "_mm512_reduce_and_epi64"
+        | "_mm512_mask_reduce_and_epi64"
+        | "_mm512_reduce_or_epi64"
+        | "_mm512_mask_reduce_or_epi64"
+        | "_mm512_mask_set1_epi64"
+        | "_mm512_maskz_set1_epi64"
+        | "_mm_cvt_roundss_si64"
+        | "_mm_cvt_roundss_i64"
+        | "_mm_cvt_roundss_u64"
+        | "_mm_cvtss_i64"
+        | "_mm_cvtss_u64"
+        | "_mm_cvt_roundsd_si64"
+        | "_mm_cvt_roundsd_i64"
+        | "_mm_cvt_roundsd_u64"
+        | "_mm_cvtsd_i64"
+        | "_mm_cvtsd_u64"
+        | "_mm_cvt_roundi64_ss"
+        | "_mm_cvt_roundi64_sd"
+        | "_mm_cvt_roundsi64_ss"
+        | "_mm_cvt_roundsi64_sd"
+        | "_mm_cvt_roundu64_ss"
+        | "_mm_cvt_roundu64_sd"
+        | "_mm_cvti64_ss"
+        | "_mm_cvti64_sd"
+        | "_mm_cvtt_roundss_si64"
+        | "_mm_cvtt_roundss_i64"
+        | "_mm_cvtt_roundss_u64"
+        | "_mm_cvttss_i64"
+        | "_mm_cvttss_u64"
+        | "_mm_cvtt_roundsd_si64"
+        | "_mm_cvtt_roundsd_i64"
+        | "_mm_cvtt_roundsd_u64"
+        | "_mm_cvttsd_i64"
+        | "_mm_cvttsd_u64"
+        | "_mm_cvtu64_ss"
+        | "_mm_cvtu64_sd" => true,
+
+        // These return a 64-bit argument but they're assembled from other
+        // 32-bit registers, so these work on 32-bit just fine. See #308 for
+        // more info.
+        "_rdtsc" | "__rdtscp" => true,
+
+        _ => false,
+    };
+    if any_i64 && !any_i64_exempt && !rust.file.contains("x86_64") {
+        bail!(
+            "intrinsic `{}` uses a 64-bit bare type but may be \
+             available on 32-bit platforms",
+            rust.name
+        )
+    }
+    Ok(())
+}
+
+fn equate(
+    t: &Type,
+    intel: &str,
+    etype: &str,
+    intrinsic: &str,
+    is_const: bool,
+) -> Result<(), String> {
+    // Make pointer adjacent to the type: float * foo => float* foo
+    let mut intel = intel.replace(" *", "*");
+    // Make mutability modifier adjacent to the pointer:
+    // float const * foo => float const* foo
+    intel = intel.replace("const *", "const*");
+    // Normalize mutability modifier to after the type:
+    // const float* foo => float const*
+    if intel.starts_with("const") && intel.ends_with('*') {
+        intel = intel.replace("const ", "");
+        intel = intel.replace("*", " const*");
+    }
+    if etype == "IMM" {
+        // The _bittest intrinsics claim to only accept immediates but actually
+        // accept run-time values as well.
+        if !is_const && !intrinsic.starts_with("_bittest") {
+            return bail!("argument required to be const but isn't");
+        }
+    } else {
+        // const int must be an IMM
+        assert_ne!(intel, "const int");
+        if is_const {
+            return bail!("argument is const but shouldn't be");
+        }
+    }
+    match (t, &intel[..]) {
+        (&Type::PrimFloat(32), "float") => {}
+        (&Type::PrimFloat(64), "double") => {}
+        (&Type::PrimSigned(16), "__int16") => {}
+        (&Type::PrimSigned(16), "short") => {}
+        (&Type::PrimSigned(32), "__int32") => {}
+        (&Type::PrimSigned(32), "const int") => {}
+        (&Type::PrimSigned(32), "int") => {}
+        (&Type::PrimSigned(64), "__int64") => {}
+        (&Type::PrimSigned(64), "long long") => {}
+        (&Type::PrimSigned(8), "__int8") => {}
+        (&Type::PrimSigned(8), "char") => {}
+        (&Type::PrimUnsigned(16), "unsigned short") => {}
+        (&Type::PrimUnsigned(32), "unsigned int") => {}
+        (&Type::PrimUnsigned(32), "const unsigned int") => {}
+        (&Type::PrimUnsigned(64), "unsigned __int64") => {}
+        (&Type::PrimUnsigned(8), "unsigned char") => {}
+        (&Type::M64, "__m64") => {}
+        (&Type::M128, "__m128") => {}
+        (&Type::M128BH, "__m128bh") => {}
+        (&Type::M128I, "__m128i") => {}
+        (&Type::M128D, "__m128d") => {}
+        (&Type::M256, "__m256") => {}
+        (&Type::M256BH, "__m256bh") => {}
+        (&Type::M256I, "__m256i") => {}
+        (&Type::M256D, "__m256d") => {}
+        (&Type::M512, "__m512") => {}
+        (&Type::M512BH, "__m512bh") => {}
+        (&Type::M512I, "__m512i") => {}
+        (&Type::M512D, "__m512d") => {}
+        (&Type::MMASK64, "__mmask64") => {}
+        (&Type::MMASK32, "__mmask32") => {}
+        (&Type::MMASK16, "__mmask16") => {}
+        (&Type::MMASK8, "__mmask8") => {}
+
+        (&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
+        (&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
+        (&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {}
+        (&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(32)), "void*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(16)), "void*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(8)), "void*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(64)), "void*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
+        (&Type::MutPtr(&Type::PrimSigned(8)), "char*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(16)), "unsigned short*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(32)), "unsigned int*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(64)), "unsigned __int64*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(8)), "void*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(32)), "__mmask32*") => {}
+        (&Type::MutPtr(&Type::PrimUnsigned(64)), "__mmask64*") => {}
+        (&Type::MutPtr(&Type::M64), "__m64*") => {}
+        (&Type::MutPtr(&Type::M128), "__m128*") => {}
+        (&Type::MutPtr(&Type::M128BH), "__m128bh*") => {}
+        (&Type::MutPtr(&Type::M128I), "__m128i*") => {}
+        (&Type::MutPtr(&Type::M128D), "__m128d*") => {}
+        (&Type::MutPtr(&Type::M256), "__m256*") => {}
+        (&Type::MutPtr(&Type::M256BH), "__m256bh*") => {}
+        (&Type::MutPtr(&Type::M256I), "__m256i*") => {}
+        (&Type::MutPtr(&Type::M256D), "__m256d*") => {}
+        (&Type::MutPtr(&Type::M512), "__m512*") => {}
+        (&Type::MutPtr(&Type::M512BH), "__m512bh*") => {}
+        (&Type::MutPtr(&Type::M512I), "__m512i*") => {}
+        (&Type::MutPtr(&Type::M512D), "__m512d*") => {}
+
+        (&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
+        (&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
+        (&Type::ConstPtr(&Type::PrimFloat(32)), "void const*") => {}
+        (&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(8)), "void const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(16)), "void const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(32)), "void const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(64)), "void const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
+        (&Type::ConstPtr(&Type::PrimSigned(8)), "char const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(16)), "unsigned short const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(32)), "unsigned int const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(64)), "unsigned __int64 const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(32)), "void const*") => {}
+        (&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
+        (&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
+        (&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {}
+        (&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
+        (&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
+        (&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
+        (&Type::ConstPtr(&Type::M256BH), "__m256bh const*") => {}
+        (&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
+        (&Type::ConstPtr(&Type::M256D), "__m256d const*") => {}
+        (&Type::ConstPtr(&Type::M512), "__m512 const*") => {}
+        (&Type::ConstPtr(&Type::M512BH), "__m512bh const*") => {}
+        (&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
+        (&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(32)), "__mmask32*") => {}
+        (&Type::ConstPtr(&Type::PrimUnsigned(64)), "__mmask64*") => {}
+
+        (&Type::MM_CMPINT_ENUM, "_MM_CMPINT_ENUM") => {}
+        (&Type::MM_MANTISSA_NORM_ENUM, "_MM_MANTISSA_NORM_ENUM") => {}
+        (&Type::MM_MANTISSA_SIGN_ENUM, "_MM_MANTISSA_SIGN_ENUM") => {}
+        (&Type::MM_PERM_ENUM, "_MM_PERM_ENUM") => {}
+
+        // This is a macro (?) in C which seems to mutate its arguments, but
+        // that means that we're taking pointers to arguments in rust
+        // as we're not exposing it as a macro.
+        (&Type::MutPtr(&Type::M128), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {}
+
+        // The _rdtsc intrinsic uses a __int64 return type, but this is a bug in
+        // the intrinsics guide: https://github.com/rust-lang/stdarch/issues/559
+        // We have manually fixed the bug by changing the return type to `u64`.
+        (&Type::PrimUnsigned(64), "__int64") if intrinsic == "_rdtsc" => {}
+
+        // The _bittest and _bittest64 intrinsics takes a mutable pointer in the
+        // intrinsics guide even though it never writes through the pointer:
+        (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32*") if intrinsic == "_bittest" => {}
+        (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64*") if intrinsic == "_bittest64" => {}
+        // The _xrstor, _fxrstor, _xrstor64, _fxrstor64 intrinsics take a
+        // mutable pointer in the intrinsics guide even though they never write
+        // through the pointer:
+        (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void*")
+            if intrinsic == "_xrstor"
+                || intrinsic == "_xrstor64"
+                || intrinsic == "_fxrstor"
+                || intrinsic == "_fxrstor64" => {}
+
+        _ => bail!(
+            "failed to equate: `{}` and {:?} for {}",
+            intel,
+            t,
+            intrinsic
+        ),
+    }
+    Ok(())
+}
diff --git a/library/stdarch/crates/stdarch-verify/x86-intel.xml b/library/stdarch/crates/stdarch-verify/x86-intel.xml
new file mode 100644
index 000000000..264ecee0e
--- /dev/null
+++ b/library/stdarch/crates/stdarch-verify/x86-intel.xml
@@ -0,0 +1,148137 @@
+<intrinsics_list version="3.5.3" date="06/30/2020">
+<intrinsic tech="Other" name="_addcarryx_u32">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>ADX</CPUID>
+	<category>Arithmetic</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned char" varname="c_in" etype="UI8"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<parameter type="unsigned int *" varname="out" etype="UI32" memwidth="32"/>
+	<description>Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+tmp[32:0] := a[31:0] + b[31:0] + (c_in &gt; 0 ? 1 : 0)
+MEM[out+31:out] := tmp[31:0]
+dst[0] := tmp[32]
+dst[7:1] := 0
+	</operation>
+	<instruction name="ADCX" form="r32, r32" xed="ADCX_GPR32d_GPR32d"/>
+	<instruction name="ADOX" form="r32, r32" xed="ADOX_GPR32d_GPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_addcarryx_u64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>ADX</CPUID>
+	<category>Arithmetic</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned char" varname="c_in" etype="UI8"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<parameter type="unsigned __int64 *" varname="out" etype="UI64" memwidth="64"/>
+	<description>Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry or overflow flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+tmp[64:0] := a[63:0] + b[63:0] + (c_in &gt; 0 ? 1 : 0)
+MEM[out+63:out] := tmp[63:0]
+dst[0] := tmp[64]
+dst[7:1] := 0
+	</operation>
+	<instruction name="ADCX" form="r64, r64" xed="ADCX_GPR64q_GPR64q"/>
+	<instruction name="ADOX" form="r64, r64" xed="ADOX_GPR64q_GPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" vexEq="TRUE" name="_mm_aesenc_si128">
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="RoundKey" etype="M128"/>
+	<description>Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
+	<operation>a[127:0] := ShiftRows(a[127:0])
+a[127:0] := SubBytes(a[127:0])
+a[127:0] := MixColumns(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name="AESENC" form="xmm, xmm" xed="AESENC_XMMdq_XMMdq"/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" vexEq="TRUE" name="_mm_aesenclast_si128">
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="RoundKey" etype="M128"/>
+	<description>Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst"."</description>
+	<operation>a[127:0] := ShiftRows(a[127:0])
+a[127:0] := SubBytes(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name="AESENCLAST" form="xmm, xmm" xed="AESENCLAST_XMMdq_XMMdq"/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" vexEq="TRUE" name="_mm_aesdec_si128">
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="RoundKey" etype="M128"/>
+	<description>Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst".</description>
+	<operation>a[127:0] := InvShiftRows(a[127:0])
+a[127:0] := InvSubBytes(a[127:0])
+a[127:0] := InvMixColumns(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name="AESDEC" form="xmm, xmm" xed="AESDEC_XMMdq_XMMdq"/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" vexEq="TRUE" name="_mm_aesdeclast_si128">
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="RoundKey" etype="M128"/>
+	<description>Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the result in "dst".</description>
+	<operation>a[127:0] := InvShiftRows(a[127:0])
+a[127:0] := InvSubBytes(a[127:0])
+dst[127:0] := a[127:0] XOR RoundKey[127:0]
+	</operation>
+	<instruction name="AESDECLAST" form="xmm, xmm" xed="AESDECLAST_XMMdq_XMMdq"/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" vexEq="TRUE" name="_mm_aesimc_si128">
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<description>Perform the InvMixColumns transformation on "a" and store the result in "dst".</description>
+	<operation>dst[127:0] := InvMixColumns(a[127:0])
+	</operation>
+	<instruction name="AESIMC" form="xmm, xmm" xed="AESIMC_XMMdq_XMMdq"/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" vexEq="TRUE" name="_mm_aeskeygenassist_si128">
+	<type>Integer</type>
+	<CPUID>AES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from "a" and an 8-bit round constant specified in "imm8", and store the result in "dst"."</description>
+	<operation>X3[31:0] := a[127:96]
+X2[31:0] := a[95:64]
+X1[31:0] := a[63:32]
+X0[31:0] := a[31:0]
+RCON[31:0] := ZeroExtend32(imm8[7:0])
+dst[31:0] := SubWord(X1)
+dst[63:32] := RotWord(SubWord(X1)) XOR RCON
+dst[95:64] := SubWord(X3)
+dst[127:96] := RotWord(SubWord(X3)) XOR RCON
+	</operation>
+	<instruction name="AESKEYGENASSIST" form="xmm, xmm, imm8" xed="AESKEYGENASSIST_XMMdq_XMMdq_IMMb"/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_dpbf16ps">
+	<type>Tile</type>
+	<type>Floating Point</type>
+	<CPUID>AMXBF16</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="dst"/>
+	<parameter type="__tile" varname="a"/>
+	<parameter type="__tile" varname="b"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in tiles "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "dst", and store the 32-bit result back to tile "dst".</description>
+	<operation>FOR m := 0 TO dst.rows - 1
+	tmp := dst.row[m]
+	FOR k := 0 TO (a.colsb / 4) - 1
+		FOR n := 0 TO (dst.colsb / 4) - 1
+			tmp.fp32[n] += FP32(a.row[m].bf16[2*k+0]) * FP32(b.row[k].bf16[2*n+0])
+			tmp.fp32[n] += FP32(a.row[m].bf16[2*k+1]) * FP32(b.row[k].bf16[2*n+1])
+		ENDFOR
+	ENDFOR
+	write_row_and_zero(dst, m, tmp, dst.colsb)
+ENDFOR
+zero_upper_rows(dst, dst.rows)
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TDPBF16PS" form="tmm, tmm, tmm" xed="TDPBF16PS_TMMf32_TMMu32_TMMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_dpbsud">
+	<type>Tile</type>
+	<CPUID>AMXINT8</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="dst"/>
+	<parameter type="__tile" varname="a"/>
+	<parameter type="__tile" varname="b"/>
+	<description>Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst".</description>
+	<operation>DEFINE DPBD(c, x, y) {
+	tmp1 := SignExtend32(x.byte[0]) * ZeroExtend32(y.byte[0])
+	tmp2 := SignExtend32(x.byte[1]) * ZeroExtend32(y.byte[1])
+	tmp3 := SignExtend32(x.byte[2]) * ZeroExtend32(y.byte[2])
+	tmp4 := SignExtend32(x.byte[3]) * ZeroExtend32(y.byte[3])
+	
+	RETURN c + tmp1 + tmp2 + tmp3 + tmp4
+}
+FOR m := 0 TO dst.rows - 1
+	tmp := dst.row[m]
+	FOR k := 0 TO (a.colsb / 4) - 1
+		FOR n := 0 TO (dst.colsb / 4) - 1
+			tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n])
+		ENDFOR
+	ENDFOR
+	write_row_and_zero(dst, m, tmp, dst.colsb)
+ENDFOR
+zero_upper_rows(dst, dst.rows)
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TDPBSUD" form="tmm, tmm, tmm" xed="TDPBSUD_TMMi32_TMMu32_TMMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_dpbusd">
+	<type>Tile</type>
+	<CPUID>AMXINT8</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="dst"/>
+	<parameter type="__tile" varname="a"/>
+	<parameter type="__tile" varname="b"/>
+	<description>Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst".</description>
+	<operation>DEFINE DPBD(c, x, y) {
+	tmp1 := ZeroExtend32(x.byte[0]) * SignExtend32(y.byte[0])
+	tmp2 := ZeroExtend32(x.byte[1]) * SignExtend32(y.byte[1])
+	tmp3 := ZeroExtend32(x.byte[2]) * SignExtend32(y.byte[2])
+	tmp4 := ZeroExtend32(x.byte[3]) * SignExtend32(y.byte[3])
+	
+	RETURN c + tmp1 + tmp2 + tmp3 + tmp4
+}
+FOR m := 0 TO dst.rows - 1
+	tmp := dst.row[m]
+	FOR k := 0 TO (a.colsb / 4) - 1
+		FOR n := 0 TO (dst.colsb / 4) - 1
+			tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n])
+		ENDFOR
+	ENDFOR
+	write_row_and_zero(dst, m, tmp, dst.colsb)
+ENDFOR
+zero_upper_rows(dst, dst.rows)
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TDPBUSD" form="tmm, tmm, tmm" xed="TDPBUSD_TMMi32_TMMu32_TMMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_dpbuud">
+	<type>Tile</type>
+	<CPUID>AMXINT8</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="dst"/>
+	<parameter type="__tile" varname="a"/>
+	<parameter type="__tile" varname="b"/>
+	<description>Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding unsigned 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst".</description>
+	<operation>DEFINE DPBD(c, x, y) {
+	tmp1 := ZeroExtend32(x.byte[0]) * ZeroExtend32(y.byte[0])
+	tmp2 := ZeroExtend32(x.byte[1]) * ZeroExtend32(y.byte[1])
+	tmp3 := ZeroExtend32(x.byte[2]) * ZeroExtend32(y.byte[2])
+	tmp4 := ZeroExtend32(x.byte[3]) * ZeroExtend32(y.byte[3])
+	
+	RETURN c + tmp1 + tmp2 + tmp3 + tmp4
+}
+FOR m := 0 TO dst.rows - 1
+	tmp := dst.row[m]
+	FOR k := 0 TO (a.colsb / 4) - 1
+		FOR n := 0 TO (dst.colsb / 4) - 1
+			tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n])
+		ENDFOR
+	ENDFOR
+	write_row_and_zero(dst, m, tmp, dst.colsb)
+ENDFOR
+zero_upper_rows(dst, dst.rows)
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TDPBUUD" form="tmm, tmm, tmm" xed="TDPBUUD_TMMu32_TMMu32_TMMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_dpbssd">
+	<type>Tile</type>
+	<CPUID>AMXINT8</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="dst"/>
+	<parameter type="__tile" varname="a"/>
+	<parameter type="__tile" varname="b"/>
+	<description>Compute dot-product of bytes in tiles with a source/destination accumulator. Multiply groups of 4 adjacent pairs of signed 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate 32-bit results. Sum these 4 results with the corresponding 32-bit integer in "dst", and store the 32-bit result back to tile "dst".</description>
+	<operation>DEFINE DPBD(c, x, y) {
+	tmp1 := SignExtend32(x.byte[0]) * SignExtend32(y.byte[0])
+	tmp2 := SignExtend32(x.byte[1]) * SignExtend32(y.byte[1])
+	tmp3 := SignExtend32(x.byte[2]) * SignExtend32(y.byte[2])
+	tmp4 := SignExtend32(x.byte[3]) * SignExtend32(y.byte[3])
+	
+	RETURN c + tmp1 + tmp2 + tmp3 + tmp4
+}
+FOR m := 0 TO dst.rows - 1
+	tmp := dst.row[m]
+	FOR k := 0 TO (a.colsb / 4) - 1
+		FOR n := 0 TO (dst.colsb / 4) - 1
+			tmp.dword[n] := DPBD(tmp.dword[n], a.row[m].dword[k], b.row[k].dword[n])
+		ENDFOR
+	ENDFOR
+	write_row_and_zero(dst, m, tmp, dst.colsb)
+ENDFOR
+zero_upper_rows(dst, dst.rows)
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TDPBSSD" form="tmm, tmm, tmm" xed="TDPBSSD_TMMi32_TMMu32_TMMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_loadconfig">
+	<type>Tile</type>
+	<CPUID>AMXTILE</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="const void *" varname="mem_addr" memwidth="512"/>
+	<description>Load tile configuration from a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If the specified pallette_id is zero, that signifies the init state for both the tile config and the tile data, and the tiles are zeroed. Any invalid configurations will result in #GP fault.</description>
+	<operation>
+//	format of memory payload. each field is a byte.
+//		 0: palette_id
+//		 1: startRow (8b)
+//	 2-15: reserved (must be zero)
+//	16-17: tile0.colsb -- bytes_per_row
+//	18-19: tile1.colsb
+//	20-21: tile2.colsb
+//			...
+//	46-47: tile15.colsb
+//		48: tile0.rows
+//		49: tile1.rows
+//		50: tile2.rows
+//			 ...
+//		63: tile15.rows
+	</operation>
+	<instruction name="LDTILECFG" form="m512" xed="LDTILECFG_MEM"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_storeconfig">
+	<type>Tile</type>
+	<CPUID>AMXTILE</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr" memwidth="512"/>
+	<description>Stores the current tile configuration to a 64-byte memory location specified by "mem_addr". The tile configuration format is specified below, and includes the tile type pallette, the number of bytes per row, and the number of rows. If tiles are not configured, all zeroes will be stored to memory.</description>
+	<operation>
+//	format of memory payload. each field is a byte.
+//		 0: palette_id
+//		 1: startRow (8b)
+//	 2-15: reserved (must be zero)
+//	16-17: tile0.colsb -- bytes_per_row
+//	18-19: tile1.colsb
+//	20-21: tile2.colsb
+//			...
+//	46-47: tile15.colsb
+//		48: tile0.rows
+//		49: tile1.rows
+//		50: tile2.rows
+//			 ...
+//		63: tile15.rows
+	</operation>
+	<instruction name="STTILECFG" form="m512" xed="STTILECFG_MEM"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_loadd">
+	<type>Tile</type>
+	<CPUID>AMXTILE</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="dst"/>
+	<parameter type="const void *" varname="base"/>
+	<parameter type="int" varname="stride" etype="UI32"/>
+	<description>Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig".</description>
+	<operation>start := tileconfig.startRow
+IF start == 0 // not restarting, zero incoming state
+	tilezero(dst)
+FI
+nbytes := dst.colsb
+DO WHILE start &lt; dst.rows
+	memptr := base + start * stride
+	write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes)
+	start := start + 1
+OD
+zero_upper_rows(dst, dst.rows)
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TILELOADD" form="tmm, sibmem" xed="TILELOADD_TMMu32_MEMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_stream_loadd">
+	<type>Tile</type>
+	<CPUID>AMXTILE</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="dst"/>
+	<parameter type="const void *" varname="base"/>
+	<parameter type="int" varname="stride" etype="UI32"/>
+	<description>Load tile rows from memory specifieid by "base" address and "stride" into destination tile "dst" using the tile configuration previously configured via "_tile_loadconfig". This intrinsic provides a hint to the implementation that the data will likely not be reused in the near future and the data caching can be optimized accordingly.</description>
+	<operation>start := tileconfig.startRow
+IF start == 0 // not restarting, zero incoming state
+	tilezero(dst)
+FI
+nbytes := dst.colsb
+DO WHILE start &lt; dst.rows
+	memptr := base + start * stride
+	write_row_and_zero(dst, start, read_memory(memptr, nbytes), nbytes)
+	start := start + 1
+OD
+zero_upper_rows(dst, dst.rows)
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TILELOADDT1" form="tmm, sibmem" xed="TILELOADDT1_TMMu32_MEMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_release">
+	<type>Tile</type>
+	<CPUID>AMXTILE</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<description>Release the tile configuration to return to the init state, which releases all storage it currently holds.</description>
+	<instruction name="TILERELEASE" xed="TILERELEASE"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_stored">
+	<type>Tile</type>
+	<CPUID>AMXTILE</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="src" />
+	<parameter type="void *" varname="base"/>
+	<parameter type="int" varname="stride" etype="UI32"/>
+	<description>Store the tile specified by "src" to memory specifieid by "base" address and "stride" using the tile configuration previously configured via "_tile_loadconfig".</description>
+	<operation>start := tileconfig.startRow
+DO WHILE start &lt; src.rows
+	memptr := base + start * stride
+	write_memory(memptr, src.colsb, src.row[start])
+	start := start + 1
+OD
+zero_tileconfig_start()
+	</operation>
+	<instruction name="TILESTORED" form="sibmem, tmm" xed="TILESTORED_MEMu32_TMMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AMX" name="_tile_zero">
+	<type>Tile</type>
+	<CPUID>AMXTILE</CPUID>
+	<category>Application-Targeted</category>
+	<return type="void"/>
+	<parameter type="__tile" varname="tdest"/>
+	<description>Zero the tile specified by "tdest".</description>
+	<operation>nbytes := palette_table[tileconfig.palette_id].bytes_per_row
+FOR i := 0 TO palette_table[tileconfig.palette_id].max_rows-1
+	FOR j := 0 TO nbytes-1
+		tdest.row[i].byte[j] := 0
+	ENDFOR
+ENDFOR
+	</operation>
+	<instruction name="TILEZERO" form="tmm" xed="TILEZERO_TMMu32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDPD" form="ymm, ymm, ymm" xed="VADDPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDPS" form="ymm, ymm, ymm" xed="VADDPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_addsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF ((j &amp; 1) == 0)
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDSUBPD" form="ymm, ymm, ymm" xed="VADDSUBPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_addsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF ((j &amp; 1) == 0)
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDSUBPS" form="ymm, ymm, ymm" xed="VADDSUBPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDPD" form="ymm, ymm, ymm" xed="VANDPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDPS" form="ymm, ymm, ymm" xed="VANDPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDNPD" form="ymm, ymm, ymm" xed="VANDNPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDNPS" form="ymm, ymm, ymm" xed="VANDNPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBLENDPD" form="ymm, ymm, ymm, imm8" xed="VBLENDPD_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBLENDPS" form="ymm, ymm, ymm, imm8" xed="VBLENDPS_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_blendv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="mask" etype="MASK"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBLENDVPD" form="ymm, ymm, ymm, ymm" xed="VBLENDVPD_YMMqq_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_blendv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="mask" etype="MASK"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBLENDVPS" form="ymm, ymm, ymm, ymm" xed="VBLENDVPS_YMMqq_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDIVPD" form="ymm, ymm, ymm" xed="VDIVPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDIVPS" form="ymm, ymm, ymm" xed="VDIVPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_dp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
+	<operation>
+DEFINE DP(a[127:0], b[127:0], imm8[7:0]) {
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[(4+j)%8]
+			temp[i+31:i] := a[i+31:i] * b[i+31:i]
+		ELSE
+			temp[i+31:i] := FP32(0.0)
+		FI
+	ENDFOR
+	
+	sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0])
+	
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[j%8]
+			tmpdst[i+31:i] := sum[31:0]
+		ELSE
+			tmpdst[i+31:i] := FP32(0.0)
+		FI
+	ENDFOR
+	RETURN tmpdst[127:0]
+}
+dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
+dst[255:128] := DP(a[255:128], b[255:128], imm8[7:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDPPS" form="ymm, ymm, ymm, imm8" xed="VDPPS_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_hadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[127:64] + a[63:0]
+dst[127:64] := b[127:64] + b[63:0]
+dst[191:128] := a[255:192] + a[191:128]
+dst[255:192] := b[255:192] + b[191:128]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VHADDPD" form="ymm, ymm, ymm" xed="VHADDPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_hadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+dst[159:128] := a[191:160] + a[159:128]
+dst[191:160] := a[255:224] + a[223:192]
+dst[223:192] := b[191:160] + b[159:128]
+dst[255:224] := b[255:224] + b[223:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VHADDPS" form="ymm, ymm, ymm" xed="VHADDPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_hsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - a[127:64]
+dst[127:64] := b[63:0] - b[127:64]
+dst[191:128] := a[191:128] - a[255:192]
+dst[255:192] := b[191:128] - b[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VHSUBPD" form="ymm, ymm, ymm" xed="VHSUBPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_hsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+dst[159:128] := a[159:128] - a[191:160]
+dst[191:160] := a[223:192] - a[255:224]
+dst[223:192] := b[159:128] - b[191:160]
+dst[255:224] := b[223:192] - b[255:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VHSUBPS" form="ymm, ymm, ymm" xed="VHSUBPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMAXPD" form="ymm, ymm, ymm" xed="VMAXPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMAXPS" form="ymm, ymm, ymm" xed="VMAXPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMINPD" form="ymm, ymm, ymm" xed="VMINPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMINPS" form="ymm, ymm, ymm" xed="VMINPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMULPD" form="ymm, ymm, ymm" xed="VMULPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMULPS" form="ymm, ymm, ymm" xed="VMULPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VORPD" form="ymm, ymm, ymm" xed="VORPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VORPS" form="ymm, ymm, ymm" xed="VORPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="ymm, ymm, ymm, imm8" xed="VSHUFPD_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="ymm, ymm, ymm, imm8" xed="VSHUFPS_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSUBPD" form="ymm, ymm, ymm" xed="VSUBPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSUBPS" form="ymm, ymm, ymm" xed="VSUBPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VXORPD" form="ymm, ymm, ymm" xed="VXORPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VXORPS" form="ymm, ymm, ymm" xed="VXORPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_cmp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCMPPD" form="xmm, xmm, xmm, imm8" xed="VCMPPD_XMMdq_XMMdq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cmp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] OP b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCMPPD" form="ymm, ymm, ymm, imm8" xed="VCMPPD_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_cmp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCMPPS" form="xmm, xmm, xmm, imm8" xed="VCMPPS_XMMdq_XMMdq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cmp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] OP b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCMPPS" form="ymm, ymm, ymm, imm8" xed="VCMPPS_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_cmp_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+dst[63:0] := ( a[63:0] OP b[63:0] ) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCMPSD" form="xmm, xmm, xmm, imm8" xed="VCMPSD_XMMdq_XMMdq_XMMq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_cmp_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+dst[31:0] := ( a[31:0] OP b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCMPSS" form="xmm, xmm, xmm, imm8" xed="VCMPSS_XMMdq_XMMdq_XMMd_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="ymm, xmm" xed="VCVTDQ2PD_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="ymm, ymm" xed="VCVTDQ2PS_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="xmm, ymm" xed="VCVTPD2PS_XMMdq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="ymm, ymm" xed="VCVTPS2DQ_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="ymm, xmm" xed="VCVTPS2PD_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="xmm, ymm" xed="VCVTTPD2DQ_XMMdq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="xmm, ymm" xed="VCVTPD2DQ_XMMdq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="ymm, ymm" xed="VCVTTPS2DQ_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_extractf128_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF128" form="xmm, ymm, imm8" xed="VEXTRACTF128_XMMdq_YMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_extractf128_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF128" form="xmm, ymm, imm8" xed="VEXTRACTF128_XMMdq_YMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_extractf128_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF128" form="xmm, ymm, imm8" xed="VEXTRACTF128_XMMdq_YMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_extract_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__int32" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="3"/>
+	<description>Extract a 32-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[31:0] := (a[255:0] &gt;&gt; (index[2:0] * 32))[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_extract_epi64">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="2"/>
+	<description>Extract a 64-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[255:0] &gt;&gt; (index[1:0] * 64))[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_zeroall">
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Zero the contents of all XMM or YMM registers.</description>
+	<operation>YMM0[MAX:0] := 0
+YMM1[MAX:0] := 0
+YMM2[MAX:0] := 0
+YMM3[MAX:0] := 0
+YMM4[MAX:0] := 0
+YMM5[MAX:0] := 0
+YMM6[MAX:0] := 0
+YMM7[MAX:0] := 0
+IF _64_BIT_MODE
+	YMM8[MAX:0] := 0
+	YMM9[MAX:0] := 0
+	YMM10[MAX:0] := 0
+	YMM11[MAX:0] := 0
+	YMM12[MAX:0] := 0
+	YMM13[MAX:0] := 0
+	YMM14[MAX:0] := 0
+	YMM15[MAX:0] := 0
+FI
+	</operation>
+	<instruction name="VZEROALL" xed="VZEROALL"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_zeroupper">
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Zero the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified.</description>
+	<operation>YMM0[MAX:128] := 0
+YMM1[MAX:128] := 0
+YMM2[MAX:128] := 0
+YMM3[MAX:128] := 0
+YMM4[MAX:128] := 0
+YMM5[MAX:128] := 0
+YMM6[MAX:128] := 0
+YMM7[MAX:128] := 0
+IF _64_BIT_MODE
+	YMM8[MAX:128] := 0
+	YMM9[MAX:128] := 0
+	YMM10[MAX:128] := 0
+	YMM11[MAX:128] := 0
+	YMM12[MAX:128] := 0
+	YMM13[MAX:128] := 0
+	YMM14[MAX:128] := 0
+	YMM15[MAX:128] := 0
+FI
+	</operation>
+	<instruction name="VZEROUPPER" xed="VZEROUPPER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], b[1:0])
+dst[63:32] := SELECT4(a[127:0], b[33:32])
+dst[95:64] := SELECT4(a[127:0], b[65:64])
+dst[127:96] := SELECT4(a[127:0], b[97:96])
+dst[159:128] := SELECT4(a[255:128], b[129:128])
+dst[191:160] := SELECT4(a[255:128], b[161:160])
+dst[223:192] := SELECT4(a[255:128], b[193:192])
+dst[255:224] := SELECT4(a[255:128], b[225:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="ymm, ymm, ymm" xed="VPERMILPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], b[1:0])
+dst[63:32] := SELECT4(a[127:0], b[33:32])
+dst[95:64] := SELECT4(a[127:0], b[65:64])
+dst[127:96] := SELECT4(a[127:0], b[97:96])
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="xmm, xmm, xmm" xed="VPERMILPS_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="ymm, ymm, imm8" xed="VPERMILPS_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="xmm, xmm, imm8" xed="VPERMILPS_XMMdq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+IF (b[1] == 0) dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) dst[127:64] := a[127:64]; FI
+IF (b[129] == 0) dst[191:128] := a[191:128]; FI
+IF (b[129] == 1) dst[191:128] := a[255:192]; FI
+IF (b[193] == 0) dst[255:192] := a[191:128]; FI
+IF (b[193] == 1) dst[255:192] := a[255:192]; FI
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="ymm, ymm, ymm" xed="VPERMILPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst".</description>
+	<operation>
+IF (b[1] == 0) dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) dst[127:64] := a[127:64]; FI
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="xmm, xmm, xmm" xed="VPERMILPD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI
+IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI
+IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI
+IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI
+IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="ymm, ymm, imm8" xed="VPERMILPD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="xmm, xmm, imm8" xed="VPERMILPD_XMMdq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_permute2f128_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src1, src2, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERM2F128" form="ymm, ymm, ymm, imm8" xed="VPERM2F128_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_permute2f128_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src1, src2, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERM2F128" form="ymm, ymm, ymm, imm8" xed="VPERM2F128_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_permute2f128_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src1, src2, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERM2F128" form="ymm, ymm, ymm, imm8" xed="VPERM2F128_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_broadcast_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float const *" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<description>Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst".</description>
+	<operation>
+tmp[31:0] := MEM[mem_addr+31:mem_addr]
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="ymm, m32" xed="VBROADCASTSS_YMMqq_MEMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_broadcast_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const *" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<description>Broadcast a single-precision (32-bit) floating-point element from memory to all elements of "dst".</description>
+	<operation>
+tmp[31:0] := MEM[mem_addr+31:mem_addr]
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="xmm, m32" xed="VBROADCASTSS_XMMdq_MEMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_broadcast_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double const *" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Broadcast a double-precision (64-bit) floating-point element from memory to all elements of "dst".</description>
+	<operation>
+tmp[63:0] := MEM[mem_addr+63:mem_addr]
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSD" form="ymm, m64" xed="VBROADCASTSD_YMMqq_MEMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_broadcast_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128 const *" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of "dst".</description>
+	<operation>
+tmp[127:0] := MEM[mem_addr+127:mem_addr]
+dst[127:0] := tmp[127:0]
+dst[255:128] := tmp[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF128" form="ymm, m128" xed="VBROADCASTF128_YMMqq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_broadcast_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d const *" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of "dst".</description>
+	<operation>
+tmp[127:0] := MEM[mem_addr+127:mem_addr]
+dst[127:0] := tmp[127:0]
+dst[255:128] := tmp[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF128" form="ymm, m128" xed="VBROADCASTF128_YMMqq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_insertf128_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_insertf128_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE imm8[0] OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_insertf128_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_insert_epi8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__int8" varname="i" etype="UI8"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="5"/>
+	<description>Copy "a" to "dst", and insert the 8-bit integer "i" into "dst" at the location specified by "index".</description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index[4:0]*8
+dst[sel+7:sel] := i[7:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_insert_epi16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__int16" varname="i" etype="UI16"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="4"/>
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "index".</description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index[3:0]*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_insert_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__int32" varname="i" etype="UI32"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="3"/>
+	<description>Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "index".</description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index[2:0]*32
+dst[sel+31:sel] := i[31:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_insert_epi64">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__int64" varname="i" etype="UI64"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "index".</description>
+	<operation>
+dst[255:0] := a[255:0]
+sel := index[1:0]*64
+dst[sel+63:sel] := i[63:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double const *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="ymm, m256" xed="VMOVAPD_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVAPD" form="m256, ymm" xed="VMOVAPD_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float const *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="ymm, m256" xed="VMOVAPS_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVAPS" form="m256, ymm" xed="VMOVAPS_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double const *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="ymm, m256" xed="VMOVUPD_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVUPD" form="m256, ymm" xed="VMOVUPD_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float const *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="ymm, m256" xed="VMOVUPS_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVUPS" form="m256, ymm" xed="VMOVUPS_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_load_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i const *" varname="mem_addr" etype="M256" memwidth="256"/>
+	<description>Load 256-bits of integer data from memory into "dst".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA" form="ymm, m256" xed="VMOVDQA_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_store_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m256i *" varname="mem_addr" etype="M256" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<description>Store 256-bits of integer data from "a" into memory.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQA" form="m256, ymm" xed="VMOVDQA_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_loadu_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i const *" varname="mem_addr" etype="M256" memwidth="256"/>
+	<description>Load 256-bits of integer data from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU" form="ymm, m256" xed="VMOVDQU_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_storeu_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m256i *" varname="mem_addr" etype="M256" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<description>Store 256-bits of integer data from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQU" form="m256, ymm" xed="VMOVDQU_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_maskload_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double const *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMASKMOVPD" form="ymm, ymm, m256" xed="VMASKMOVPD_YMMqq_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_maskstore_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMASKMOVPD" form="m256, ymm, ymm" xed="VMASKMOVPD_MEMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_maskload_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const *" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMASKMOVPD" form="xmm, xmm, m128" xed="VMASKMOVPD_XMMdq_XMMdq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_maskstore_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double *" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMASKMOVPD" form="m128, xmm, xmm" xed="VMASKMOVPD_MEMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_maskload_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float const *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMASKMOVPS" form="ymm, ymm, m256" xed="VMASKMOVPS_YMMqq_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_maskstore_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMASKMOVPS" form="m256, ymm, ymm" xed="VMASKMOVPS_MEMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_maskload_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const *" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMASKMOVPS" form="xmm, xmm, m128" xed="VMASKMOVPS_XMMdq_XMMdq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_maskstore_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float *" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using "mask".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMASKMOVPS" form="m128, xmm, xmm" xed="VMASKMOVPS_MEMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] 
+dst[63:32] := a[63:32] 
+dst[95:64] := a[127:96] 
+dst[127:96] := a[127:96]
+dst[159:128] := a[191:160] 
+dst[191:160] := a[191:160] 
+dst[223:192] := a[255:224] 
+dst[255:224] := a[255:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="ymm, ymm" xed="VMOVSHDUP_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] 
+dst[63:32] := a[31:0] 
+dst[95:64] := a[95:64] 
+dst[127:96] := a[95:64]
+dst[159:128] := a[159:128] 
+dst[191:160] := a[159:128] 
+dst[223:192] := a[223:192] 
+dst[255:224] := a[223:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="ymm, ymm" xed="VMOVSLDUP_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Move</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := a[63:0]
+dst[191:128] := a[191:128]
+dst[255:192] := a[191:128]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="ymm, ymm" xed="VMOVDDUP_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_lddqu_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i const *" varname="mem_addr" etype="M256" memwidth="256"/>
+	<description>Load 256-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm256_loadu_si256" when the data crosses a cache line boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VLDDQU" form="ymm, m256" xed="VLDDQU_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_stream_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m256i *" varname="mem_addr" etype="M256" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<description>Store 256-bits of integer data from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVNTDQ" form="m256, ymm" xed="VMOVNTDQ_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_stream_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVNTPD" form="m256, ymm" xed="VMOVNTPD_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_stream_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVNTPS" form="m256, ymm" xed="VMOVNTPS_MEMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_rcp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := 1.0 / a[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRCPPS" form="ymm, ymm" xed="VRCPPS_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_rsqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRSQRTPS" form="ymm, ymm" xed="VRSQRTPS_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="ymm, ymm" xed="VSQRTPD_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="ymm, ymm" xed="VSQRTPS_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" immtype="_MM_FROUND"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i], rounding)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VROUNDPD" form="ymm, ymm, imm8" xed="VROUNDPD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" immtype="_MM_FROUND"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i], rounding)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VROUNDPS" form="ymm, ymm, imm8" xed="VROUNDPS_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="ymm, ymm, ymm" xed="VUNPCKHPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="ymm, ymm, ymm" xed="VUNPCKHPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="ymm, ymm, ymm" xed="VUNPCKLPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="ymm, ymm, ymm" xed="VUNPCKLPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testz_si256">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+IF ((a[255:0] AND b[255:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF (((NOT a[255:0]) AND b[255:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name="VPTEST" form="ymm, ymm" xed="VPTEST_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testc_si256">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+IF ((a[255:0] AND b[255:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF (((NOT a[255:0]) AND b[255:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name="VPTEST" form="ymm, ymm" xed="VPTEST_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testnzc_si256">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+IF ((a[255:0] AND b[255:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF (((NOT a[255:0]) AND b[255:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="VPTEST" form="ymm, ymm" xed="VPTEST_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testz_pd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := ZF
+	</operation>
+	<instruction name="VTESTPD" form="ymm, ymm" xed="VTESTPD_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testc_pd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := CF
+	</operation>
+	<instruction name="VTESTPD" form="ymm, ymm" xed="VTESTPD_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testnzc_pd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="VTESTPD" form="ymm, ymm" xed="VTESTPD_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_testz_pd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := ZF
+	</operation>
+	<instruction name="VTESTPD" form="xmm, xmm" xed="VTESTPD_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_testc_pd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := CF
+	</operation>
+	<instruction name="VTESTPD" form="xmm, xmm" xed="VTESTPD_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_testnzc_pd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[63] == 0 &amp;&amp; tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="VTESTPD" form="xmm, xmm" xed="VTESTPD_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testz_ps">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; \
+    tmp[159] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[223] == 0 &amp;&amp; tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; \
+    tmp[159] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[223] == 0 &amp;&amp; tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := ZF
+	</operation>
+	<instruction name="VTESTPS" form="ymm, ymm" xed="VTESTPS_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testc_ps">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; \
+    tmp[159] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[223] == 0 &amp;&amp; tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; \
+    tmp[159] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[223] == 0 &amp;&amp; tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := CF
+	</operation>
+	<instruction name="VTESTPS" form="ymm, ymm" xed="VTESTPS_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_testnzc_ps">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 256-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[255:0] := a[255:0] AND b[255:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; \
+    tmp[159] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[223] == 0 &amp;&amp; tmp[255] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[255:0] := (NOT a[255:0]) AND b[255:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0 &amp;&amp; \
+    tmp[159] == 0 &amp;&amp; tmp[191] == 0 &amp;&amp; tmp[223] == 0 &amp;&amp; tmp[255] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="VTESTPS" form="ymm, ymm" xed="VTESTPS_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_testz_ps">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := ZF
+	</operation>
+	<instruction name="VTESTPS" form="xmm, xmm" xed="VTESTPS_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_testc_ps">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := CF
+	</operation>
+	<instruction name="VTESTPS" form="xmm, xmm" xed="VTESTPS_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm_testnzc_ps">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in "a" and "b", producing an intermediate 128-bit value, and set "ZF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", producing an intermediate value, and set "CF" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+tmp[127:0] := a[127:0] AND b[127:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+tmp[127:0] := (NOT a[127:0]) AND b[127:0]
+IF (tmp[31] == 0 &amp;&amp; tmp[63] == 0 &amp;&amp; tmp[95] == 0 &amp;&amp; tmp[127] == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="VTESTPS" form="xmm, xmm" xed="VTESTPS_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_movemask_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:4] := 0
+	</operation>
+	<instruction name="VMOVMSKPD" form="r32, ymm" xed="VMOVMSKPD_GPR32d_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_movemask_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:8] := 0
+	</operation>
+	<instruction name="VMOVMSKPS" form="r32, ymm" xed="VMOVMSKPS_GPR32d_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_setzero_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m256d with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VXORPD" form="ymm, ymm, ymm" xed="VXORPD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_setzero_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m256 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VXORPS" form="ymm, ymm, ymm" xed="VXORPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_setzero_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m256i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VPXOR" form="ymm, ymm, ymm" xed="VPXOR_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="e3" etype="FP64"/>
+	<parameter type="double" varname="e2" etype="FP64"/>
+	<parameter type="double" varname="e1" etype="FP64"/>
+	<parameter type="double" varname="e0" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="e7" etype="FP32"/>
+	<parameter type="float" varname="e6" etype="FP32"/>
+	<parameter type="float" varname="e5" etype="FP32"/>
+	<parameter type="float" varname="e4" etype="FP32"/>
+	<parameter type="float" varname="e3" etype="FP32"/>
+	<parameter type="float" varname="e2" etype="FP32"/>
+	<parameter type="float" varname="e1" etype="FP32"/>
+	<parameter type="float" varname="e0" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set_epi8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="e31" etype="UI8"/>
+	<parameter type="char" varname="e30" etype="UI8"/>
+	<parameter type="char" varname="e29" etype="UI8"/>
+	<parameter type="char" varname="e28" etype="UI8"/>
+	<parameter type="char" varname="e27" etype="UI8"/>
+	<parameter type="char" varname="e26" etype="UI8"/>
+	<parameter type="char" varname="e25" etype="UI8"/>
+	<parameter type="char" varname="e24" etype="UI8"/>
+	<parameter type="char" varname="e23" etype="UI8"/>
+	<parameter type="char" varname="e22" etype="UI8"/>
+	<parameter type="char" varname="e21" etype="UI8"/>
+	<parameter type="char" varname="e20" etype="UI8"/>
+	<parameter type="char" varname="e19" etype="UI8"/>
+	<parameter type="char" varname="e18" etype="UI8"/>
+	<parameter type="char" varname="e17" etype="UI8"/>
+	<parameter type="char" varname="e16" etype="UI8"/>
+	<parameter type="char" varname="e15" etype="UI8"/>
+	<parameter type="char" varname="e14" etype="UI8"/>
+	<parameter type="char" varname="e13" etype="UI8"/>
+	<parameter type="char" varname="e12" etype="UI8"/>
+	<parameter type="char" varname="e11" etype="UI8"/>
+	<parameter type="char" varname="e10" etype="UI8"/>
+	<parameter type="char" varname="e9" etype="UI8"/>
+	<parameter type="char" varname="e8" etype="UI8"/>
+	<parameter type="char" varname="e7" etype="UI8"/>
+	<parameter type="char" varname="e6" etype="UI8"/>
+	<parameter type="char" varname="e5" etype="UI8"/>
+	<parameter type="char" varname="e4" etype="UI8"/>
+	<parameter type="char" varname="e3" etype="UI8"/>
+	<parameter type="char" varname="e2" etype="UI8"/>
+	<parameter type="char" varname="e1" etype="UI8"/>
+	<parameter type="char" varname="e0" etype="UI8"/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+dst[71:64] := e8
+dst[79:72] := e9
+dst[87:80] := e10
+dst[95:88] := e11
+dst[103:96] := e12
+dst[111:104] := e13
+dst[119:112] := e14
+dst[127:120] := e15
+dst[135:128] := e16
+dst[143:136] := e17
+dst[151:144] := e18
+dst[159:152] := e19
+dst[167:160] := e20
+dst[175:168] := e21
+dst[183:176] := e22
+dst[191:184] := e23
+dst[199:192] := e24
+dst[207:200] := e25
+dst[215:208] := e26
+dst[223:216] := e27
+dst[231:224] := e28
+dst[239:232] := e29
+dst[247:240] := e30
+dst[255:248] := e31
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set_epi16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="e15" etype="UI16"/>
+	<parameter type="short" varname="e14" etype="UI16"/>
+	<parameter type="short" varname="e13" etype="UI16"/>
+	<parameter type="short" varname="e12" etype="UI16"/>
+	<parameter type="short" varname="e11" etype="UI16"/>
+	<parameter type="short" varname="e10" etype="UI16"/>
+	<parameter type="short" varname="e9" etype="UI16"/>
+	<parameter type="short" varname="e8" etype="UI16"/>
+	<parameter type="short" varname="e7" etype="UI16"/>
+	<parameter type="short" varname="e6" etype="UI16"/>
+	<parameter type="short" varname="e5" etype="UI16"/>
+	<parameter type="short" varname="e4" etype="UI16"/>
+	<parameter type="short" varname="e3" etype="UI16"/>
+	<parameter type="short" varname="e2" etype="UI16"/>
+	<parameter type="short" varname="e1" etype="UI16"/>
+	<parameter type="short" varname="e0" etype="UI16"/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+dst[79:64] := e4
+dst[95:80] := e5
+dst[111:96] := e6
+dst[127:112] := e7
+dst[143:128] := e8
+dst[159:144] := e9
+dst[175:160] := e10
+dst[191:176] := e11
+dst[207:192] := e12
+dst[223:208] := e13
+dst[239:224] := e14
+dst[255:240] := e15
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="e7" etype="UI32"/>
+	<parameter type="int" varname="e6" etype="UI32"/>
+	<parameter type="int" varname="e5" etype="UI32"/>
+	<parameter type="int" varname="e4" etype="UI32"/>
+	<parameter type="int" varname="e3" etype="UI32"/>
+	<parameter type="int" varname="e2" etype="UI32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set_epi64x">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="e3" etype="UI64"/>
+	<parameter type="__int64" varname="e2" etype="UI64"/>
+	<parameter type="__int64" varname="e1" etype="UI64"/>
+	<parameter type="__int64" varname="e0" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_setr_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="e3" etype="FP64"/>
+	<parameter type="double" varname="e2" etype="FP64"/>
+	<parameter type="double" varname="e1" etype="FP64"/>
+	<parameter type="double" varname="e0" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e3
+dst[127:64] := e2
+dst[191:128] := e1
+dst[255:192] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_setr_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="e7" etype="FP32"/>
+	<parameter type="float" varname="e6" etype="FP32"/>
+	<parameter type="float" varname="e5" etype="FP32"/>
+	<parameter type="float" varname="e4" etype="FP32"/>
+	<parameter type="float" varname="e3" etype="FP32"/>
+	<parameter type="float" varname="e2" etype="FP32"/>
+	<parameter type="float" varname="e1" etype="FP32"/>
+	<parameter type="float" varname="e0" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e7
+dst[63:32] := e6
+dst[95:64] := e5
+dst[127:96] := e4
+dst[159:128] := e3
+dst[191:160] := e2
+dst[223:192] := e1
+dst[255:224] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_setr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="e31" etype="UI8"/>
+	<parameter type="char" varname="e30" etype="UI8"/>
+	<parameter type="char" varname="e29" etype="UI8"/>
+	<parameter type="char" varname="e28" etype="UI8"/>
+	<parameter type="char" varname="e27" etype="UI8"/>
+	<parameter type="char" varname="e26" etype="UI8"/>
+	<parameter type="char" varname="e25" etype="UI8"/>
+	<parameter type="char" varname="e24" etype="UI8"/>
+	<parameter type="char" varname="e23" etype="UI8"/>
+	<parameter type="char" varname="e22" etype="UI8"/>
+	<parameter type="char" varname="e21" etype="UI8"/>
+	<parameter type="char" varname="e20" etype="UI8"/>
+	<parameter type="char" varname="e19" etype="UI8"/>
+	<parameter type="char" varname="e18" etype="UI8"/>
+	<parameter type="char" varname="e17" etype="UI8"/>
+	<parameter type="char" varname="e16" etype="UI8"/>
+	<parameter type="char" varname="e15" etype="UI8"/>
+	<parameter type="char" varname="e14" etype="UI8"/>
+	<parameter type="char" varname="e13" etype="UI8"/>
+	<parameter type="char" varname="e12" etype="UI8"/>
+	<parameter type="char" varname="e11" etype="UI8"/>
+	<parameter type="char" varname="e10" etype="UI8"/>
+	<parameter type="char" varname="e9" etype="UI8"/>
+	<parameter type="char" varname="e8" etype="UI8"/>
+	<parameter type="char" varname="e7" etype="UI8"/>
+	<parameter type="char" varname="e6" etype="UI8"/>
+	<parameter type="char" varname="e5" etype="UI8"/>
+	<parameter type="char" varname="e4" etype="UI8"/>
+	<parameter type="char" varname="e3" etype="UI8"/>
+	<parameter type="char" varname="e2" etype="UI8"/>
+	<parameter type="char" varname="e1" etype="UI8"/>
+	<parameter type="char" varname="e0" etype="UI8"/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e31
+dst[15:8] := e30
+dst[23:16] := e29
+dst[31:24] := e28
+dst[39:32] := e27
+dst[47:40] := e26
+dst[55:48] := e25
+dst[63:56] := e24
+dst[71:64] := e23
+dst[79:72] := e22
+dst[87:80] := e21
+dst[95:88] := e20
+dst[103:96] := e19
+dst[111:104] := e18
+dst[119:112] := e17
+dst[127:120] := e16
+dst[135:128] := e15
+dst[143:136] := e14
+dst[151:144] := e13
+dst[159:152] := e12
+dst[167:160] := e11
+dst[175:168] := e10
+dst[183:176] := e9
+dst[191:184] := e8
+dst[199:192] := e7
+dst[207:200] := e6
+dst[215:208] := e5
+dst[223:216] := e4
+dst[231:224] := e3
+dst[239:232] := e2
+dst[247:240] := e1
+dst[255:248] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_setr_epi16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="e15" etype="UI16"/>
+	<parameter type="short" varname="e14" etype="UI16"/>
+	<parameter type="short" varname="e13" etype="UI16"/>
+	<parameter type="short" varname="e12" etype="UI16"/>
+	<parameter type="short" varname="e11" etype="UI16"/>
+	<parameter type="short" varname="e10" etype="UI16"/>
+	<parameter type="short" varname="e9" etype="UI16"/>
+	<parameter type="short" varname="e8" etype="UI16"/>
+	<parameter type="short" varname="e7" etype="UI16"/>
+	<parameter type="short" varname="e6" etype="UI16"/>
+	<parameter type="short" varname="e5" etype="UI16"/>
+	<parameter type="short" varname="e4" etype="UI16"/>
+	<parameter type="short" varname="e3" etype="UI16"/>
+	<parameter type="short" varname="e2" etype="UI16"/>
+	<parameter type="short" varname="e1" etype="UI16"/>
+	<parameter type="short" varname="e0" etype="UI16"/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[15:0] := e15
+dst[31:16] := e14
+dst[47:32] := e13
+dst[63:48] := e12
+dst[79:64] := e11
+dst[95:80] := e10
+dst[111:96] := e9
+dst[127:112] := e8
+dst[143:128] := e7
+dst[159:144] := e6
+dst[175:160] := e5
+dst[191:176] := e4
+dst[207:192] := e3
+dst[223:208] := e2
+dst[239:224] := e1
+dst[255:240] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_setr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="e7" etype="UI32"/>
+	<parameter type="int" varname="e6" etype="UI32"/>
+	<parameter type="int" varname="e5" etype="UI32"/>
+	<parameter type="int" varname="e4" etype="UI32"/>
+	<parameter type="int" varname="e3" etype="UI32"/>
+	<parameter type="int" varname="e2" etype="UI32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e7
+dst[63:32] := e6
+dst[95:64] := e5
+dst[127:96] := e4
+dst[159:128] := e3
+dst[191:160] := e2
+dst[223:192] := e1
+dst[255:224] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_setr_epi64x">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="e3" etype="UI64"/>
+	<parameter type="__int64" varname="e2" etype="UI64"/>
+	<parameter type="__int64" varname="e1" etype="UI64"/>
+	<parameter type="__int64" varname="e0" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e3
+dst[127:64] := e2
+dst[191:128] := e1
+dst[255:192] := e0
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set1_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set1_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastb".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate the "vpbroadcastw".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastd".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_set1_epi64x">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="long long" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m256d to type __m256.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m256 to type __m256d.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castps_si256">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m256 to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castpd_si256">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m256d to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castsi256_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Cast vector of type __m256i to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castsi256_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Cast vector of type __m256i to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castps256_ps128">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m256 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castpd256_pd128">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m256d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castsi256_si128">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<description>Cast vector of type __m256i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castps128_ps256">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m128 to type __m256; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castpd128_pd256">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_castsi128_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m128i" varname="a" etype="M256"/>
+	<description>Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_zextps128_ps256">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m128 to type __m256; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_zextpd128_pd256">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_zextsi128_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Cast</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m128i" varname="a" etype="M256"/>
+	<description>Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_floor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VROUNDPS" form="ymm, ymm, imm8" xed="VROUNDPS_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_ceil_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VROUNDPS" form="ymm, ymm, imm8" xed="VROUNDPS_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_floor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VROUNDPD" form="ymm, ymm, imm8" xed="VROUNDPD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_ceil_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VROUNDPD" form="ymm, ymm, imm8" xed="VROUNDPD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_undefined_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m256 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_undefined_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m256d with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_undefined_si256">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>General Support</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m256i with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_set_m128">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="hi" etype="FP32"/>
+	<parameter type="__m128" varname="lo" etype="FP32"/>
+	<description>Set packed __m256 vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_set_m128d">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="hi" etype="FP64"/>
+	<parameter type="__m128d" varname="lo" etype="FP64"/>
+	<description>Set packed __m256d vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_set_m128i">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="hi" etype="M128"/>
+	<parameter type="__m128i" varname="lo" etype="M128"/>
+	<description>Set packed __m256i vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_setr_m128">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="lo" etype="FP32"/>
+	<parameter type="__m128" varname="hi" etype="FP32"/>
+	<description>Set packed __m256 vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_setr_m128d">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="lo" etype="FP64"/>
+	<parameter type="__m128d" varname="hi" etype="FP64"/>
+	<description>Set packed __m256d vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" name="_mm256_setr_m128i">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="lo" etype="M128"/>
+	<parameter type="__m128i" varname="hi" etype="M128"/>
+	<description>Set packed __m256i vector "dst" with the supplied values.</description>
+	<operation>
+dst[127:0] := lo[127:0]
+dst[255:128] := hi[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF128" form="ymm, ymm, xmm, imm8" xed="VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_loadu2_m128">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="hiaddr" etype="FP32" memwidth="128"/>
+	<parameter type="float const*" varname="loaddr" etype="FP32" memwidth="128"/>
+	<description>Load two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst".
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[loaddr+127:loaddr]
+dst[255:128] := MEM[hiaddr+127:hiaddr]
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_loadu2_m128d">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="hiaddr" etype="FP64" memwidth="128"/>
+	<parameter type="double const*" varname="loaddr" etype="FP64" memwidth="128"/>
+	<description>Load two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value in "dst".
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[loaddr+127:loaddr]
+dst[255:128] := MEM[hiaddr+127:hiaddr]
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_loadu2_m128i">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m128i const*" varname="hiaddr" etype="M128" memwidth="128"/>
+	<parameter type="__m128i const*" varname="loaddr" etype="M128" memwidth="128"/>
+	<description>Load two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value in "dst".
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[loaddr+127:loaddr]
+dst[255:128] := MEM[hiaddr+127:hiaddr]
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_storeu2_m128">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="hiaddr" etype="FP32" memwidth="128"/>
+	<parameter type="float*" varname="loaddr" etype="FP32" memwidth="128"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Store the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory two different 128-bit locations.
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[loaddr+127:loaddr] := a[127:0]
+MEM[hiaddr+127:hiaddr] := a[255:128]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_storeu2_m128d">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="hiaddr" etype="FP64" memwidth="128"/>
+	<parameter type="double*" varname="loaddr" etype="FP64" memwidth="128"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Store the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory two different 128-bit locations.
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[loaddr+127:loaddr] := a[127:0]
+MEM[hiaddr+127:hiaddr] := a[255:128]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" sequence="TRUE" name="_mm256_storeu2_m128i">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m128i*" varname="hiaddr" etype="M128" memwidth="128"/>
+	<parameter type="__m128i*" varname="loaddr" etype="M128" memwidth="128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<description>Store the high and low 128-bit halves (each composed of integer data) from "a" into memory two different 128-bit locations.
+	"hiaddr" and "loaddr" do not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[loaddr+127:loaddr] := a[127:0]
+MEM[hiaddr+127:hiaddr] := a[255:128]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_acos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ACOS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_acos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ACOS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_acosh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ACOSH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_acosh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ACOSH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_asin_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ASIN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_asin_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ASIN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_asinh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ASINH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_asinh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ASINH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_atan_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_atan_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_atan2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_atan2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_atanh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ATANH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_atanh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ATANH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cbrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cbrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cdfnorm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cdfnorm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cdfnorminv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cdfnorminv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]".</description>
+	<operation>
+DEFINE CEXP(a[31:0], b[31:0]) {
+	result[31:0]  := POW(FP32(e), a[31:0]) * COS(b[31:0])
+	result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0])
+	RETURN result
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_clog_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]".</description>
+	<operation>
+DEFINE CLOG(a[31:0], b[31:0]) {
+	result[31:0]  := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0)))
+	result[63:32] := ATAN2(b, a)
+	RETURN result
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cosd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := COSD(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cosd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := COSD(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cosh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := COSH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_cosh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := COSH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_csqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]".</description>
+	<operation>
+DEFINE CSQRT(a[31:0], b[31:0]) {
+	sign[31:0] := (b &lt; 0.0) ? -FP32(1.0) : FP32(1.0)
+	result[31:0]  := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0)
+	result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0)
+	RETURN result
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epi8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 8*j
+	IF b[i+7:i] == 0
+		#DE
+	FI
+	dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epi16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	IF b[i+15:i] == 0
+		#DE
+	FI
+	dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF b[i+31:i] == 0
+		#DE
+	FI
+	dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epi64">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	IF b[i+63:i] == 0
+		#DE
+	FI
+	dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epu8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 8*j
+	IF b[i+7:i] == 0
+		#DE
+	FI
+	dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epu16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	IF b[i+15:i] == 0
+		#DE
+	FI
+	dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epu32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF b[i+31:i] == 0
+		#DE
+	FI
+	dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_div_epu64">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	IF b[i+63:i] == 0
+		#DE
+	FI
+	dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erf_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ERF(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erf_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ERF(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erfc_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erfc_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+63:i] := 1.0 - ERF(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erfcinv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erfcinv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erfinv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_erfinv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+63:i] := 1.0 / ERF(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_exp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := POW(e, a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_exp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := POW(FP32(e), a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_exp10_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := POW(10.0, a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_exp10_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := POW(FP32(10.0), a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_exp2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := POW(2.0, a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_exp2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_expm1_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := POW(e, a[i+63:i]) - 1.0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_expm1_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_hypot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_hypot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0))
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_idiv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_idivrem_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i *" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_invcbrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := InvCubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_invcbrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := InvCubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_invsqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := InvSQRT(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_invsqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := InvSQRT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_irem_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log10_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log10_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log1p_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LOG(1.0 + a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log1p_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LOG(1.0 + a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_log2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_logb_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_logb_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_pow_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := POW(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_pow_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := POW(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epi8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 31
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epi16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epi64">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epu8">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 31
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epu16">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epu32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_rem_epu64">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sin_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sin_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sincos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d *" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+	MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sincos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256 *" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sind_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SIND(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sind_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SIND(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sinh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SINH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_sinh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SINH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_ceil_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_ceil_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_floor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_floor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_svml_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_tan_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TAN(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_tan_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TAN(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_tand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TAND(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_tand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TAND(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_tanh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TANH(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_tanh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TANH(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_trunc_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := TRUNCATE(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_trunc_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := TRUNCATE(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_udiv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_udivrem_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i *" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm256_urem_epi32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" vexEq="TRUE" name="_mm256_cvtss_f32">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="VMOVSS" form="m32, xmm" xed="VMOVSS_MEMd_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" vexEq="TRUE" name="_mm256_cvtsd_f64">
+	<type>Floating Point</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="VMOVSD" form="m64, xmm" xed="VMOVSD_MEMq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX" vexEq="TRUE" name="_mm256_cvtsi256_si32">
+	<type>Integer</type>
+	<CPUID>AVX</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="VMOVD" form="r32, xmm" xed="VMOVD_GPR32d_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" sequence="TRUE" name="_mm256_extract_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="5"/>
+	<description>Extract an 8-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[7:0] := (a[255:0] &gt;&gt; (index[4:0] * 8))[7:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" sequence="TRUE" name="_mm256_extract_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="const int" varname="index" etype="IMM" immwidth="4"/>
+	<description>Extract a 16-bit integer from "a", selected with "index", and store the result in "dst".</description>
+	<operation>
+dst[15:0] := (a[255:0] &gt;&gt; (index[3:0] * 16))[15:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := ABS(a[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSB" form="ymm, ymm" xed="VPABSB_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ABS(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSW" form="ymm, ymm" xed="VPABSW_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSD" form="ymm, ymm" xed="VPABSD_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDB" form="ymm, ymm, ymm" xed="VPADDB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDW" form="ymm, ymm, ymm" xed="VPADDW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDD" form="ymm, ymm, ymm" xed="VPADDD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDQ" form="ymm, ymm, ymm" xed="VPADDQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDSB" form="ymm, ymm, ymm" xed="VPADDSB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDSW" form="ymm, ymm, ymm" xed="VPADDSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="ymm, ymm, ymm" xed="VPADDUSB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="ymm, ymm, ymm" xed="VPADDUSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128)[255:0] OR b[i+127:i]) &gt;&gt; (imm8*8)
+	dst[i+127:i] := tmp[127:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="ymm, ymm, ymm, imm8" xed="VPALIGNR_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_and_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<description>Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := (a[255:0] AND b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPAND" form="ymm, ymm, ymm" xed="VPAND_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_andnot_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<description>Compute the bitwise NOT of 256 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := ((NOT a[255:0]) AND b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDN" form="ymm, ymm, ymm" xed="VPANDN_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPAVGB" form="ymm, ymm, ymm" xed="VPAVGB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPAVGW" form="ymm, ymm, ymm" xed="VPAVGW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_blend_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Blend packed 16-bit integers from "a" and "b" within 128-bit lanes using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[j%8]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBLENDW" form="ymm, ymm, ymm, imm8" xed="VPBLENDW_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBLENDD" form="xmm, xmm, xmm, imm8" xed="VPBLENDD_XMMdq_XMMdq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBLENDD" form="ymm, ymm, ymm, imm8" xed="VPBLENDD_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_blendv_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF mask[i+7]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBLENDVB" form="ymm, ymm, ymm, ymm" xed="VPBLENDVB_YMMqq_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="xmm, xmm" xed="VPBROADCASTB_XMMdq_XMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="ymm, xmm" xed="VPBROADCASTB_YMMqq_XMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="xmm, xmm" xed="VPBROADCASTD_XMMdq_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="ymm, xmm" xed="VPBROADCASTD_YMMqq_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="xmm, xmm" xed="VPBROADCASTQ_XMMdq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="ymm, xmm" xed="VPBROADCASTQ_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" vexEq="TRUE" name="_mm_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="MOVDDUP" form="xmm, xmm" xed="MOVDDUP_XMMdq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSD" form="ymm, xmm" xed="VBROADCASTSD_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_broadcastsi128_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<description>Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst".</description>
+	<operation>
+dst[127:0] := a[127:0]
+dst[255:128] := a[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI128" form="ymm, m128" xed="VBROADCASTI128_YMMqq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_broadcastsi128_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<description>Broadcast 128 bits of integer data from "a" to all 128-bit lanes in "dst".</description>
+	<operation>
+dst[127:0] := a[127:0]
+dst[255:128] := a[127:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI128" form="ymm, m128" xed="VBROADCASTI128_YMMqq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="xmm, xmm" xed="VBROADCASTSS_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="ymm, xmm" xed="VBROADCASTSS_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="xmm, xmm" xed="VPBROADCASTW_XMMdq_XMMw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="ymm, xmm" xed="VPBROADCASTW_YMMqq_XMMw"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpeq_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPEQB" form="ymm, ymm, ymm" xed="VPCMPEQB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpeq_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPEQW" form="ymm, ymm, ymm" xed="VPCMPEQW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpeq_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPEQD" form="ymm, ymm, ymm" xed="VPCMPEQD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpeq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPEQQ" form="ymm, ymm, ymm" xed="VPCMPEQQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpgt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPGTB" form="ymm, ymm, ymm" xed="VPCMPGTB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpgt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPGTW" form="ymm, ymm, ymm" xed="VPCMPGTW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpgt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPGTD" form="ymm, ymm, ymm" xed="VPCMPGTD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cmpgt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] &gt; b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCMPGTQ" form="ymm, ymm, ymm" xed="VPCMPGTQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := SignExtend32(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="ymm, xmm" xed="VPMOVSXWD_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := SignExtend64(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="ymm, xmm" xed="VPMOVSXWQ_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := SignExtend64(a[k+31:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="ymm, xmm" xed="VPMOVSXDQ_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	dst[l+15:l] := SignExtend16(a[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="ymm, xmm" xed="VPMOVSXBW_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := SignExtend32(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="ymm, xmm" xed="VPMOVSXBD_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := SignExtend64(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="ymm, xmm" xed="VPMOVSXBQ_YMMqq_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := ZeroExtend32(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="ymm, xmm" xed="VPMOVZXWD_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := ZeroExtend64(a[k+15:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="ymm, xmm" xed="VPMOVZXWQ_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j:= 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := ZeroExtend64(a[k+31:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="ymm, xmm" xed="VPMOVZXDQ_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	dst[l+15:l] := ZeroExtend16(a[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="ymm, xmm" xed="VPMOVZXBW_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := ZeroExtend32(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="ymm, xmm" xed="VPMOVZXBD_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := ZeroExtend64(a[k+7:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="ymm, xmm" xed="VPMOVZXBQ_YMMqq_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_extracti128_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI128" form="xmm, ymm, imm8" xed="VEXTRACTI128_XMMdq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_hadd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[31:16] + a[15:0]
+dst[31:16] := a[63:48] + a[47:32]
+dst[47:32] := a[95:80] + a[79:64]
+dst[63:48] := a[127:112] + a[111:96]
+dst[79:64] := b[31:16] + b[15:0]
+dst[95:80] := b[63:48] + b[47:32]
+dst[111:96] := b[95:80] + b[79:64]
+dst[127:112] := b[127:112] + b[111:96]
+dst[143:128] := a[159:144] + a[143:128]
+dst[159:144] := a[191:176] + a[175:160]
+dst[175:160] := a[223:208] + a[207:192]
+dst[191:176] := a[255:240] + a[239:224]
+dst[207:192] := b[159:144] + b[143:128]
+dst[223:208] := b[191:176] + b[175:160]
+dst[239:224] := b[223:208] + b[207:192]
+dst[255:240] := b[255:240] + b[239:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPHADDW" form="ymm, ymm, ymm" xed="VPHADDW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_hadd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+dst[159:128] := a[191:160] + a[159:128]
+dst[191:160] := a[255:224] + a[223:192]
+dst[223:192] := b[191:160] + b[159:128]
+dst[255:224] := b[255:224] + b[223:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPHADDD" form="ymm, ymm, ymm" xed="VPHADDD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_hadds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:16] + a[15:0])
+dst[31:16] := Saturate16(a[63:48] + a[47:32])
+dst[47:32] := Saturate16(a[95:80] + a[79:64])
+dst[63:48] := Saturate16(a[127:112] + a[111:96])
+dst[79:64] := Saturate16(b[31:16] + b[15:0])
+dst[95:80] := Saturate16(b[63:48] + b[47:32])
+dst[111:96] := Saturate16(b[95:80] + b[79:64])
+dst[127:112] := Saturate16(b[127:112] + b[111:96])
+dst[143:128] := Saturate16(a[159:144] + a[143:128])
+dst[159:144] := Saturate16(a[191:176] + a[175:160])
+dst[175:160] := Saturate16(a[223:208] + a[207:192])
+dst[191:176] := Saturate16(a[255:240] + a[239:224])
+dst[207:192] := Saturate16(b[159:144] + b[143:128])
+dst[223:208] := Saturate16(b[191:176] + b[175:160])
+dst[239:224] := Saturate16(b[223:208] + b[207:192])
+dst[255:240] := Saturate16(b[255:240] + b[239:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPHADDSW" form="ymm, ymm, ymm" xed="VPHADDSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_hsub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[15:0] - a[31:16]
+dst[31:16] := a[47:32] - a[63:48]
+dst[47:32] := a[79:64] - a[95:80]
+dst[63:48] := a[111:96] - a[127:112]
+dst[79:64] := b[15:0] - b[31:16]
+dst[95:80] := b[47:32] - b[63:48]
+dst[111:96] := b[79:64] - b[95:80]
+dst[127:112] := b[111:96] - b[127:112]
+dst[143:128] := a[143:128] - a[159:144]
+dst[159:144] := a[175:160] - a[191:176]
+dst[175:160] := a[207:192] - a[223:208]
+dst[191:176] := a[239:224] - a[255:240]
+dst[207:192] := b[143:128] - b[159:144]
+dst[223:208] := b[175:160] - b[191:176]
+dst[239:224] := b[207:192] - b[223:208]
+dst[255:240] := b[239:224] - b[255:240]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPHSUBW" form="ymm, ymm, ymm" xed="VPHSUBW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_hsub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+dst[159:128] := a[159:128] - a[191:160]
+dst[191:160] := a[223:192] - a[255:224]
+dst[223:192] := b[159:128] - b[191:160]
+dst[255:224] := b[223:192] - b[255:224]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPHSUBD" form="ymm, ymm, ymm" xed="VPHSUBD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_hsubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[15:0] - a[31:16])
+dst[31:16] := Saturate16(a[47:32] - a[63:48])
+dst[47:32] := Saturate16(a[79:64] - a[95:80])
+dst[63:48] := Saturate16(a[111:96] - a[127:112])
+dst[79:64] := Saturate16(b[15:0] - b[31:16])
+dst[95:80] := Saturate16(b[47:32] - b[63:48])
+dst[111:96] := Saturate16(b[79:64] - b[95:80])
+dst[127:112] := Saturate16(b[111:96] - b[127:112])
+dst[143:128] := Saturate16(a[143:128] - a[159:144])
+dst[159:144] := Saturate16(a[175:160] - a[191:176])
+dst[175:160] := Saturate16(a[207:192] - a[223:208])
+dst[191:176] := Saturate16(a[239:224] - a[255:240])
+dst[207:192] := Saturate16(b[143:128] - b[159:144])
+dst[223:208] := Saturate16(b[175:160] - b[191:176])
+dst[239:224] := Saturate16(b[207:192] - b[223:208])
+dst[255:240] := Saturate16(b[239:224] - b[255:240])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPHSUBSW" form="ymm, ymm, ymm" xed="VPHSUBSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="xmm, vm32x, xmm" xed="VGATHERDPD_XMMf64_MEMf64_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="ymm, vm32x, ymm" xed="VGATHERDPD_YMMf64_MEMf64_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="xmm, vm32x, xmm" xed="VGATHERDPS_XMMf32_MEMf32_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="ymm, vm32x, ymm" xed="VGATHERDPS_YMMf32_MEMf32_YMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="xmm, vm32x, xmm" xed="VPGATHERDD_XMMu32_MEMd_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="ymm, vm32x, ymm" xed="VPGATHERDD_YMMu32_MEMd_YMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="xmm, vm32x, xmm" xed="VPGATHERDQ_XMMu64_MEMq_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="ymm, vm32x, ymm" xed="VPGATHERDQ_YMMu64_MEMq_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="xmm, vm64x, xmm" xed="VGATHERQPD_XMMf64_MEMf64_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="ymm, vm64x, ymm" xed="VGATHERQPD_YMMf64_MEMf64_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="xmm, vm64x, xmm" xed="VGATHERQPS_XMMf32_MEMf32_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="xmm, vm64y, xmm" xed="VGATHERQPS_XMMf32_MEMf32_XMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="xmm, vm64x, xmm" xed="VPGATHERQD_XMMu32_MEMd_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="xmm, vm64y, xmm" xed="VPGATHERQD_XMMu32_MEMd_XMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="xmm, vm64x, xmm" xed="VPGATHERQQ_XMMu64_MEMq_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="ymm, vm64x, ymm" xed="VPGATHERQQ_YMMu64_MEMq_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_inserti128_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of integer data) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTI128" form="ymm, ymm, xmm, imm8" xed="VINSERTI128_YMMqq_YMMqq_XMMdq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="ymm, ymm, ymm" xed="VPMADDWD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="ymm, ymm, ymm" xed="VPMADDUBSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128d" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="xmm, vm32x, xmm" xed="VGATHERDPD_XMMf64_MEMf64_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256d" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="ymm, vm32x, ymm" xed="VGATHERDPD_YMMf64_MEMf64_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	IF mask[i+31]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="xmm, vm32x, xmm" xed="VGATHERDPS_XMMf32_MEMf32_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	IF mask[i+31]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="ymm, vm32x, ymm" xed="VGATHERDPS_YMMf32_MEMf32_YMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	IF mask[i+31]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="xmm, vm32x, xmm" xed="VPGATHERDD_XMMu32_MEMd_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	IF mask[i+31]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="ymm, vm32x, ymm" xed="VPGATHERDD_YMMu32_MEMd_YMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="xmm, vm32x, xmm" xed="VPGATHERDQ_XMMu64_MEMq_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF mask[i+63]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="ymm, vm32x, ymm" xed="VPGATHERDQ_YMMu64_MEMq_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128d" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	IF mask[i+63]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="xmm, vm64x, xmm" xed="VGATHERQPD_XMMf64_MEMf64_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="double const*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256d" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	IF mask[i+63]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="ymm, vm64x, ymm" xed="VGATHERQPD_YMMf64_MEMf64_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:64] := 0
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="xmm, vm64x, xmm" xed="VGATHERQPS_XMMf32_MEMf32_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="float const*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="xmm, vm64y, xmm" xed="VGATHERQPS_XMMf32_MEMf32_XMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:64] := 0
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="xmm, vm64x, xmm" xed="VPGATHERQD_XMMu32_MEMd_XMMi32_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="int const*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF mask[i+31]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="xmm, vm64y, xmm" xed="VPGATHERQD_XMMu32_MEMd_XMMi32_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_mask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	IF mask[i+63]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:128] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="xmm, vm64x, xmm" xed="VPGATHERQQ_XMMu64_MEMq_XMMi64_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__int64 const*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	IF mask[i+63]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+mask[MAX:256] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="ymm, vm64x, ymm" xed="VPGATHERQQ_YMMu64_MEMq_YMMi64_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_maskload_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<description>Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMASKMOVD" form="xmm, xmm, m128" xed="VPMASKMOVD_XMMdq_XMMdq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_maskload_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="int const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<description>Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMASKMOVD" form="ymm, ymm, m256" xed="VPMASKMOVD_YMMqq_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_maskload_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__int64 const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<description>Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMASKMOVQ" form="xmm, xmm, m128" xed="VPMASKMOVQ_XMMdq_XMMdq_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_maskload_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__int64 const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<description>Load packed 64-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMASKMOVQ" form="ymm, ymm, m256" xed="VPMASKMOVQ_YMMqq_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_maskstore_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="int*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMASKMOVD" form="m128, xmm, xmm" xed="VPMASKMOVD_MEMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_maskstore_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="int*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF mask[i+31]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMASKMOVD" form="m256, ymm, ymm" xed="VPMASKMOVD_MEMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_maskstore_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__int64*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<parameter type="__m128i" varname="mask" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMASKMOVQ" form="m128, xmm, xmm" xed="VPMASKMOVQ_MEMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_maskstore_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__int64*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<parameter type="__m256i" varname="mask" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF mask[i+63]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMASKMOVQ" form="m256, ymm, ymm" xed="VPMASKMOVQ_MEMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="ymm, ymm, ymm" xed="VPMAXSB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="ymm, ymm, ymm" xed="VPMAXSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="ymm, ymm, ymm" xed="VPMAXSD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="ymm, ymm, ymm" xed="VPMAXUB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="ymm, ymm, ymm" xed="VPMAXUW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="ymm, ymm, ymm" xed="VPMAXUD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSB" form="ymm, ymm, ymm" xed="VPMINSB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSW" form="ymm, ymm, ymm" xed="VPMINSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSD" form="ymm, ymm, ymm" xed="VPMINSD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUB" form="ymm, ymm, ymm" xed="VPMINUB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUW" form="ymm, ymm, ymm" xed="VPMINUW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUD" form="ymm, ymm, ymm" xed="VPMINUD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_movemask_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+	</operation>
+	<instruction name="VPMOVMSKB" form="r32, ymm" xed="VPMOVMSKB_GPR32d_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mpsadbw_epu8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Eight SADs are performed for each 128-bit lane using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8".</description>
+	<operation>
+DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
+	a_offset := imm8[2]*32
+	b_offset := imm8[1:0]*32
+	FOR j := 0 to 7
+		i := j*8
+		k := a_offset+i
+		l := b_offset
+		tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \
+		                   ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24]))
+	ENDFOR
+	RETURN tmp[127:0]
+}
+dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])
+dst[255:128] := MPSADBW(a[255:128], b[255:128], imm8[5:3])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMPSADBW" form="ymm, ymm, ymm, imm8" xed="VMPSADBW_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="ymm, ymm, ymm" xed="VPMULDQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="ymm, ymm, ymm" xed="VPMULUDQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHW" form="ymm, ymm, ymm" xed="VPMULHW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="ymm, ymm, ymm" xed="VPMULHUW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="ymm, ymm, ymm" xed="VPMULHRSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLW" form="ymm, ymm, ymm" xed="VPMULLW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Multiply the packed signed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	tmp[63:0] := a[i+31:i] * b[i+31:i]
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLD" form="ymm, ymm, ymm" xed="VPMULLD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_or_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<description>Compute the bitwise OR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := (a[255:0] OR b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOR" form="ymm, ymm, ymm" xed="VPOR_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="SI8"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate8(a[15:0])
+dst[15:8] := Saturate8(a[31:16])
+dst[23:16] := Saturate8(a[47:32])
+dst[31:24] := Saturate8(a[63:48])
+dst[39:32] := Saturate8(a[79:64])
+dst[47:40] := Saturate8(a[95:80])
+dst[55:48] := Saturate8(a[111:96])
+dst[63:56] := Saturate8(a[127:112])
+dst[71:64] := Saturate8(b[15:0])
+dst[79:72] := Saturate8(b[31:16])
+dst[87:80] := Saturate8(b[47:32])
+dst[95:88] := Saturate8(b[63:48])
+dst[103:96] := Saturate8(b[79:64])
+dst[111:104] := Saturate8(b[95:80])
+dst[119:112] := Saturate8(b[111:96])
+dst[127:120] := Saturate8(b[127:112])
+dst[135:128] := Saturate8(a[143:128])
+dst[143:136] := Saturate8(a[159:144])
+dst[151:144] := Saturate8(a[175:160])
+dst[159:152] := Saturate8(a[191:176])
+dst[167:160] := Saturate8(a[207:192])
+dst[175:168] := Saturate8(a[223:208])
+dst[183:176] := Saturate8(a[239:224])
+dst[191:184] := Saturate8(a[255:240])
+dst[199:192] := Saturate8(b[143:128])
+dst[207:200] := Saturate8(b[159:144])
+dst[215:208] := Saturate8(b[175:160])
+dst[223:216] := Saturate8(b[191:176])
+dst[231:224] := Saturate8(b[207:192])
+dst[239:232] := Saturate8(b[223:208])
+dst[247:240] := Saturate8(b[239:224])
+dst[255:248] := Saturate8(b[255:240])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="ymm, ymm, ymm" xed="VPACKSSWB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:0])
+dst[31:16] := Saturate16(a[63:32])
+dst[47:32] := Saturate16(a[95:64])
+dst[63:48] := Saturate16(a[127:96])
+dst[79:64] := Saturate16(b[31:0])
+dst[95:80] := Saturate16(b[63:32])
+dst[111:96] := Saturate16(b[95:64])
+dst[127:112] := Saturate16(b[127:96])
+dst[143:128] := Saturate16(a[159:128])
+dst[159:144] := Saturate16(a[191:160])
+dst[175:160] := Saturate16(a[223:192])
+dst[191:176] := Saturate16(a[255:224])
+dst[207:192] := Saturate16(b[159:128])
+dst[223:208] := Saturate16(b[191:160])
+dst[239:224] := Saturate16(b[223:192])
+dst[255:240] := Saturate16(b[255:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="ymm, ymm, ymm" xed="VPACKSSDW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := SaturateU8(a[15:0])
+dst[15:8] := SaturateU8(a[31:16])
+dst[23:16] := SaturateU8(a[47:32])
+dst[31:24] := SaturateU8(a[63:48])
+dst[39:32] := SaturateU8(a[79:64])
+dst[47:40] := SaturateU8(a[95:80])
+dst[55:48] := SaturateU8(a[111:96])
+dst[63:56] := SaturateU8(a[127:112])
+dst[71:64] := SaturateU8(b[15:0])
+dst[79:72] := SaturateU8(b[31:16])
+dst[87:80] := SaturateU8(b[47:32])
+dst[95:88] := SaturateU8(b[63:48])
+dst[103:96] := SaturateU8(b[79:64])
+dst[111:104] := SaturateU8(b[95:80])
+dst[119:112] := SaturateU8(b[111:96])
+dst[127:120] := SaturateU8(b[127:112])
+dst[135:128] := SaturateU8(a[143:128])
+dst[143:136] := SaturateU8(a[159:144])
+dst[151:144] := SaturateU8(a[175:160])
+dst[159:152] := SaturateU8(a[191:176])
+dst[167:160] := SaturateU8(a[207:192])
+dst[175:168] := SaturateU8(a[223:208])
+dst[183:176] := SaturateU8(a[239:224])
+dst[191:184] := SaturateU8(a[255:240])
+dst[199:192] := SaturateU8(b[143:128])
+dst[207:200] := SaturateU8(b[159:144])
+dst[215:208] := SaturateU8(b[175:160])
+dst[223:216] := SaturateU8(b[191:176])
+dst[231:224] := SaturateU8(b[207:192])
+dst[239:232] := SaturateU8(b[223:208])
+dst[247:240] := SaturateU8(b[239:224])
+dst[255:248] := SaturateU8(b[255:240])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="ymm, ymm, ymm" xed="VPACKUSWB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := SaturateU16(a[31:0])
+dst[31:16] := SaturateU16(a[63:32])
+dst[47:32] := SaturateU16(a[95:64])
+dst[63:48] := SaturateU16(a[127:96])
+dst[79:64] := SaturateU16(b[31:0])
+dst[95:80] := SaturateU16(b[63:32])
+dst[111:96] := SaturateU16(b[95:64])
+dst[127:112] := SaturateU16(b[127:96])
+dst[143:128] := SaturateU16(a[159:128])
+dst[159:144] := SaturateU16(a[191:160])
+dst[175:160] := SaturateU16(a[223:192])
+dst[191:176] := SaturateU16(a[255:224])
+dst[207:192] := SaturateU16(b[159:128])
+dst[223:208] := SaturateU16(b[191:160])
+dst[239:224] := SaturateU16(b[223:192])
+dst[255:240] := SaturateU16(b[255:224])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="ymm, ymm, ymm" xed="VPACKUSDW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_permute2x128_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src1, src2, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src1[127:0]
+	1:	tmp[127:0] := src1[255:128]
+	2:	tmp[127:0] := src2[127:0]
+	3:	tmp[127:0] := src2[255:128]
+	ESAC
+	IF control[3]
+		tmp[127:0] := 0
+	FI
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
+dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERM2I128" form="ymm, ymm, ymm, imm8" xed="VPERM2I128_YMMqq_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_permute4x64_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMQ" form="ymm, ymm, imm8" xed="VPERMQ_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_permute4x64_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPD" form="ymm, ymm, imm8" xed="VPERMPD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_permutevar8x32_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMD" form="ymm, ymm, ymm" xed="VPERMD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_permutevar8x32_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPS" form="ymm, ymm, ymm" xed="VPERMPS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+FOR j := 0 to 3
+	i := j*64
+	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \
+	               tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
+	dst[i+63:i+16] := 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSADBW" form="ymm, ymm, ymm" xed="VPSADBW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="ymm, ymm, imm8" xed="VPSHUFD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[3:0] := b[i+3:i]
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+	IF b[128+i+7] == 1
+		dst[128+i+7:128+i] := 0
+	ELSE
+		index[3:0] := b[128+i+3:128+i]
+		dst[128+i+7:128+i] := a[128+index*8+7:128+index*8]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="ymm, ymm, ymm" xed="VPSHUFB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+dst[191:128] := a[191:128]
+dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="ymm, ymm, imm8" xed="VPSHUFHW_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+dst[127:64] := a[127:64]
+dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+dst[255:192] := a[255:192]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="ymm, ymm, imm8" xed="VPSHUFLW_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sign_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Negate packed signed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF b[i+7:i] &lt; 0
+		dst[i+7:i] := -(a[i+7:i])
+	ELSE IF b[i+7:i] == 0
+		dst[i+7:i] := 0
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSIGNB" form="ymm, ymm, ymm" xed="VPSIGNB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sign_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Negate packed signed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF b[i+15:i] &lt; 0
+		dst[i+15:i] := -(a[i+15:i])
+	ELSE IF b[i+15:i] == 0
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSIGNW" form="ymm, ymm, ymm" xed="VPSIGNW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sign_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Negate packed signed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF b[i+31:i] &lt; 0
+		dst[i+31:i] := -(a[i+31:i])
+	ELSE IF b[i+31:i] == 0
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSIGND" form="ymm, ymm, ymm" xed="VPSIGND_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_slli_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLDQ" form="ymm, ymm, imm8" xed="VPSLLDQ_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_bslli_epi128">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLDQ" form="ymm, ymm, imm8" xed="VPSLLDQ_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLW" form="ymm, ymm, xmm" xed="VPSLLW_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLW" form="ymm, ymm, imm8" xed="VPSLLW_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLD" form="ymm, ymm, xmm" xed="VPSLLD_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLD" form="ymm, ymm, imm8" xed="VPSLLD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="ymm, ymm, xmm" xed="VPSLLQ_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="ymm, ymm, imm8" xed="VPSLLQ_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="xmm, xmm, xmm" xed="VPSLLVD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="ymm, ymm, ymm" xed="VPSLLVD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="xmm, xmm, xmm" xed="VPSLLVQ_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="ymm, ymm, ymm" xed="VPSLLVQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAW" form="ymm, ymm, xmm" xed="VPSRAW_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAW" form="ymm, ymm, imm8" xed="VPSRAW_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAD" form="ymm, ymm, xmm" xed="VPSRAD_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAD" form="ymm, ymm, imm8" xed="VPSRAD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="xmm, xmm, xmm" xed="VPSRAVD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="ymm, ymm, ymm" xed="VPSRAVD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srli_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLDQ" form="ymm, ymm, imm8" xed="VPSRLDQ_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_bsrli_epi128">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLDQ" form="ymm, ymm, imm8" xed="VPSRLDQ_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLW" form="ymm, ymm, xmm" xed="VPSRLW_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLW" form="ymm, ymm, imm8" xed="VPSRLW_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLD" form="ymm, ymm, xmm" xed="VPSRLD_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLD" form="ymm, ymm, imm8" xed="VPSRLD_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="ymm, ymm, xmm" xed="VPSRLQ_YMMqq_YMMqq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="ymm, ymm, imm8" xed="VPSRLQ_YMMqq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="xmm, xmm, xmm" xed="VPSRLVD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="ymm, ymm, ymm" xed="VPSRLVD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="xmm, xmm, xmm" xed="VPSRLVQ_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="ymm, ymm, ymm" xed="VPSRLVQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_stream_load_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i const*" varname="mem_addr" etype="M256" memwidth="256"/>
+	<description>Load 256-bits of integer data from memory into "dst" using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVNTDQA" form="ymm, m256" xed="VMOVNTDQA_YMMqq_MEMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBB" form="ymm, ymm, ymm" xed="VPSUBB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBW" form="ymm, ymm, ymm" xed="VPSUBW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBD" form="ymm, ymm, ymm" xed="VPSUBD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="ymm, ymm, ymm" xed="VPSUBQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="ymm, ymm, ymm" xed="VPSUBSB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="ymm, ymm, ymm" xed="VPSUBSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="ymm, ymm, ymm" xed="VPSUBUSB_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="ymm, ymm, ymm" xed="VPSUBUSW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_xor_si256">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m256i" varname="a" etype="M256"/>
+	<parameter type="__m256i" varname="b" etype="M256"/>
+	<description>Compute the bitwise XOR of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[255:0] := (a[255:0] XOR b[255:0])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPXOR" form="ymm, ymm, ymm" xed="VPXOR_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="ymm, ymm, ymm" xed="VPUNPCKHBW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="ymm, ymm, ymm" xed="VPUNPCKHWD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="ymm, ymm, ymm" xed="VPUNPCKHDQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="ymm, ymm, ymm" xed="VPUNPCKHQDQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="ymm, ymm, ymm" xed="VPUNPCKLBW_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="ymm, ymm, ymm" xed="VPUNPCKLWD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="ymm, ymm, ymm" xed="VPUNPCKLDQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX2" name="_mm256_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="ymm, ymm, ymm" xed="VPUNPCKLQDQ_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kunpackd">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask64" varname="dst" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Unpack and interleave 32 bits from masks "a" and "b", and store the 64-bit result in "dst".</description>
+	<operation>
+dst[31:0] := b[31:0]
+dst[63:32] := a[31:0]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="KUNPCKDQ" form="k, k, k" xed="KUNPCKDQ_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kunpackw">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask32" varname="dst" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Unpack and interleave 16 bits from masks "a" and "b", and store the 32-bit result in "dst".</description>
+	<operation>
+dst[15:0] := b[15:0]
+dst[31:16] := a[15:0]
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="KUNPCKWD" form="k, k, k" xed="KUNPCKWD_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+FOR i := 0 to 1
+	tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ]
+	tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ]
+	tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ]
+	tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ]
+ENDFOR
+FOR j := 0 to 3
+	i := j*64
+	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	               ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                  ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                  ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                  ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="ymm, ymm, ymm, imm8" xed="VDBPSADBW_YMMu16_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+FOR i := 0 to 1
+	tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ]
+	tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ]
+	tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ]
+	tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ]
+ENDFOR
+FOR j := 0 to 3
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	                   ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                      ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="ymm {k}, ymm, ymm, imm8" xed="VDBPSADBW_YMMu16_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+FOR i := 0 to 1
+	tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ]
+	tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ]
+	tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ]
+	tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ]
+ENDFOR
+FOR j := 0 to 3
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	                   ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                      ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="ymm {z}, ymm, ymm, imm8" xed="VDBPSADBW_YMMu16_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+FOR i := 0 to 3
+	tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ]
+	tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ]
+	tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ]
+	tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ]
+ENDFOR
+FOR j := 0 to 7
+	i := j*64
+	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	               ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                  ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                  ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                  ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="zmm, zmm, zmm, imm8" xed="VDBPSADBW_ZMMu16_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+FOR i := 0 to 3
+	tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ]
+	tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ]
+	tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ]
+	tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ]
+ENDFOR
+FOR j := 0 to 7
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	                   ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                      ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="zmm {k}, zmm, zmm, imm8" xed="VDBPSADBW_ZMMu16_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected from within 128-bit lanes according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+FOR i := 0 to 3
+	tmp.m128[i].dword[0] := b.m128[i].dword[ imm8[1:0] ]
+	tmp.m128[i].dword[1] := b.m128[i].dword[ imm8[3:2] ]
+	tmp.m128[i].dword[2] := b.m128[i].dword[ imm8[5:4] ]
+	tmp.m128[i].dword[3] := b.m128[i].dword[ imm8[7:6] ]
+ENDFOR
+FOR j := 0 to 7
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	                   ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                      ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="zmm {z}, zmm, zmm, imm8" xed="VDBPSADBW_ZMMu16_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+tmp.dword[0] := b.dword[ imm8[1:0] ]
+tmp.dword[1] := b.dword[ imm8[3:2] ]
+tmp.dword[2] := b.dword[ imm8[5:4] ]
+tmp.dword[3] := b.dword[ imm8[7:6] ]
+FOR j := 0 to 1
+	i := j*64
+	dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	               ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                  ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                  ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                  ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="xmm, xmm, xmm, imm8" xed="VDBPSADBW_XMMu16_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+tmp.dword[0] := b.dword[ imm8[1:0] ]
+tmp.dword[1] := b.dword[ imm8[3:2] ]
+tmp.dword[2] := b.dword[ imm8[5:4] ]
+tmp.dword[3] := b.dword[ imm8[7:6] ]
+FOR j := 0 to 1
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	                   ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                      ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="xmm {k}, xmm, xmm, imm8" xed="VDBPSADBW_XMMu16_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_dbsad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from "a", and the last two SADs use the uppper 8-bit quadruplet of the lane from "a". Quadruplets from "b" are selected according to the control in "imm8", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.</description>
+	<operation>
+tmp.dword[0] := b.dword[ imm8[1:0] ]
+tmp.dword[1] := b.dword[ imm8[3:2] ]
+tmp.dword[2] := b.dword[ imm8[5:4] ]
+tmp.dword[3] := b.dword[ imm8[7:6] ]
+FOR j := 0 to 1
+	i := j*64
+	tmp_dst[i+15:i] := ABS(a[i+7:i] - tmp[i+7:i]) + ABS(a[i+15:i+8] - tmp[i+15:i+8]) +\
+	                   ABS(a[i+23:i+16] - tmp[i+23:i+16]) + ABS(a[i+31:i+24] - tmp[i+31:i+24])
+	
+	tmp_dst[i+31:i+16] := ABS(a[i+7:i] - tmp[i+15:i+8]) + ABS(a[i+15:i+8] - tmp[i+23:i+16]) +\
+	                      ABS(a[i+23:i+16] - tmp[i+31:i+24]) + ABS(a[i+31:i+24] - tmp[i+39:i+32])
+	
+	tmp_dst[i+47:i+32] := ABS(a[i+39:i+32] - tmp[i+23:i+16]) + ABS(a[i+47:i+40] - tmp[i+31:i+24]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+39:i+32]) + ABS(a[i+63:i+56] - tmp[i+47:i+40])
+	
+	tmp_dst[i+63:i+48] := ABS(a[i+39:i+32] - tmp[i+31:i+24]) + ABS(a[i+47:i+40] - tmp[i+39:i+32]) +\
+	                      ABS(a[i+55:i+48] - tmp[i+47:i+40]) + ABS(a[i+63:i+56] - tmp[i+55:i+48])
+ENDFOR
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDBPSADBW" form="xmm {z}, xmm, xmm, imm8" xed="VDBPSADBW_XMMu16_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="256"/>
+	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="ymm {k}, m256" xed="VMOVDQU16_YMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="ymm {k}, ymm" xed="VMOVDQU16_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI16" memwidth="256"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU16" form="m256 {k}, ymm" xed="VMOVDQU16_MEMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="256"/>
+	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="ymm {z}, m256" xed="VMOVDQU16_YMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="ymm {z}, ymm" xed="VMOVDQU16_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="512"/>
+	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="zmm {k}, m512" xed="VMOVDQU16_ZMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="zmm {k}, zmm" xed="VMOVDQU16_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI16" memwidth="512"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU16" form="m512 {k}, zmm" xed="VMOVDQU16_MEMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="512"/>
+	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="zmm {z}, m512" xed="VMOVDQU16_ZMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="zmm {z}, zmm" xed="VMOVDQU16_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="128"/>
+	<description>Load packed 16-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="xmm {k}, m128" xed="VMOVDQU16_XMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="xmm {k}, xmm" xed="VMOVDQU16_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Store packed 16-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU16" form="m128 {k}, xmm" xed="VMOVDQU16_MEMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="128"/>
+	<description>Load packed 16-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+i+15:mem_addr+i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="xmm {z}, m128" xed="VMOVDQU16_XMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mov_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Move packed 16-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="xmm {z}, xmm" xed="VMOVDQU16_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="256"/>
+	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="ymm {k}, m256" xed="VMOVDQU8_YMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="ymm {k}, ymm" xed="VMOVDQU8_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI8" memwidth="256"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU8" form="m256 {k}, ymm" xed="VMOVDQU8_MEMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="256"/>
+	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="ymm {z}, m256" xed="VMOVDQU8_YMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="ymm {z}, ymm" xed="VMOVDQU8_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="512"/>
+	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="zmm {k}, m512" xed="VMOVDQU8_ZMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="zmm {k}, zmm" xed="VMOVDQU8_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI8" memwidth="512"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU8" form="m512 {k}, zmm" xed="VMOVDQU8_MEMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="512"/>
+	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="zmm {z}, m512" xed="VMOVDQU8_ZMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="zmm {z}, zmm" xed="VMOVDQU8_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<description>Load packed 8-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="xmm {k}, m128" xed="VMOVDQU8_XMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="xmm {k}, xmm" xed="VMOVDQU8_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Store packed 8-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU8" form="m128 {k}, xmm" xed="VMOVDQU8_MEMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<description>Load packed 8-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+i+7:mem_addr+i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="xmm {z}, m128" xed="VMOVDQU8_XMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mov_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Move packed 8-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="xmm {z}, xmm" xed="VMOVDQU8_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSB" form="ymm {k}, ymm" xed="VPABSB_YMMi8_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSB" form="ymm {z}, ymm" xed="VPABSB_YMMi8_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := ABS(a[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSB" form="zmm, zmm" xed="VPABSB_ZMMi8_MASKmskw_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSB" form="zmm {k}, zmm" xed="VPABSB_ZMMi8_MASKmskw_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSB" form="zmm {z}, zmm" xed="VPABSB_ZMMi8_MASKmskw_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSB" form="xmm {k}, xmm" xed="VPABSB_XMMi8_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_abs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := ABS(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSB" form="xmm {z}, xmm" xed="VPABSB_XMMi8_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSW" form="ymm {k}, ymm" xed="VPABSW_YMMi16_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSW" form="ymm {z}, ymm" xed="VPABSW_YMMi16_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := ABS(a[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSW" form="zmm, zmm" xed="VPABSW_ZMMi16_MASKmskw_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSW" form="zmm {k}, zmm" xed="VPABSW_ZMMi16_MASKmskw_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSW" form="zmm {z}, zmm" xed="VPABSW_ZMMi16_MASKmskw_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSW" form="xmm {k}, xmm" xed="VPABSW_XMMi16_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_abs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ABS(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSW" form="xmm {z}, xmm" xed="VPABSW_XMMi16_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="src" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate16(a[31:0])
+tmp_dst[31:16] := Saturate16(a[63:32])
+tmp_dst[47:32] := Saturate16(a[95:64])
+tmp_dst[63:48] := Saturate16(a[127:96])
+tmp_dst[79:64] := Saturate16(b[31:0])
+tmp_dst[95:80] := Saturate16(b[63:32])
+tmp_dst[111:96] := Saturate16(b[95:64])
+tmp_dst[127:112] := Saturate16(b[127:96])
+tmp_dst[143:128] := Saturate16(a[159:128])
+tmp_dst[159:144] := Saturate16(a[191:160])
+tmp_dst[175:160] := Saturate16(a[223:192])
+tmp_dst[191:176] := Saturate16(a[255:224])
+tmp_dst[207:192] := Saturate16(b[159:128])
+tmp_dst[223:208] := Saturate16(b[191:160])
+tmp_dst[239:224] := Saturate16(b[223:192])
+tmp_dst[255:240] := Saturate16(b[255:224])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="ymm {k}, ymm, ymm" xed="VPACKSSDW_YMMi16_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate16(a[31:0])
+tmp_dst[31:16] := Saturate16(a[63:32])
+tmp_dst[47:32] := Saturate16(a[95:64])
+tmp_dst[63:48] := Saturate16(a[127:96])
+tmp_dst[79:64] := Saturate16(b[31:0])
+tmp_dst[95:80] := Saturate16(b[63:32])
+tmp_dst[111:96] := Saturate16(b[95:64])
+tmp_dst[127:112] := Saturate16(b[127:96])
+tmp_dst[143:128] := Saturate16(a[159:128])
+tmp_dst[159:144] := Saturate16(a[191:160])
+tmp_dst[175:160] := Saturate16(a[223:192])
+tmp_dst[191:176] := Saturate16(a[255:224])
+tmp_dst[207:192] := Saturate16(b[159:128])
+tmp_dst[223:208] := Saturate16(b[191:160])
+tmp_dst[239:224] := Saturate16(b[223:192])
+tmp_dst[255:240] := Saturate16(b[255:224])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="ymm {z}, ymm, ymm" xed="VPACKSSDW_YMMi16_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__m512i" varname="src" etype="SI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate16(a[31:0])
+tmp_dst[31:16] := Saturate16(a[63:32])
+tmp_dst[47:32] := Saturate16(a[95:64])
+tmp_dst[63:48] := Saturate16(a[127:96])
+tmp_dst[79:64] := Saturate16(b[31:0])
+tmp_dst[95:80] := Saturate16(b[63:32])
+tmp_dst[111:96] := Saturate16(b[95:64])
+tmp_dst[127:112] := Saturate16(b[127:96])
+tmp_dst[143:128] := Saturate16(a[159:128])
+tmp_dst[159:144] := Saturate16(a[191:160])
+tmp_dst[175:160] := Saturate16(a[223:192])
+tmp_dst[191:176] := Saturate16(a[255:224])
+tmp_dst[207:192] := Saturate16(b[159:128])
+tmp_dst[223:208] := Saturate16(b[191:160])
+tmp_dst[239:224] := Saturate16(b[223:192])
+tmp_dst[255:240] := Saturate16(b[255:224])
+tmp_dst[271:256] := Saturate16(a[287:256])
+tmp_dst[287:272] := Saturate16(a[319:288])
+tmp_dst[303:288] := Saturate16(a[351:320])
+tmp_dst[319:304] := Saturate16(a[383:352])
+tmp_dst[335:320] := Saturate16(b[287:256])
+tmp_dst[351:336] := Saturate16(b[319:288])
+tmp_dst[367:352] := Saturate16(b[351:320])
+tmp_dst[383:368] := Saturate16(b[383:352])
+tmp_dst[399:384] := Saturate16(a[415:384])
+tmp_dst[415:400] := Saturate16(a[447:416])
+tmp_dst[431:416] := Saturate16(a[479:448])
+tmp_dst[447:432] := Saturate16(a[511:480])
+tmp_dst[463:448] := Saturate16(b[415:384])
+tmp_dst[479:464] := Saturate16(b[447:416])
+tmp_dst[495:480] := Saturate16(b[479:448])
+tmp_dst[511:496] := Saturate16(b[511:480])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="zmm {k}, zmm, zmm" xed="VPACKSSDW_ZMMi16_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate16(a[31:0])
+tmp_dst[31:16] := Saturate16(a[63:32])
+tmp_dst[47:32] := Saturate16(a[95:64])
+tmp_dst[63:48] := Saturate16(a[127:96])
+tmp_dst[79:64] := Saturate16(b[31:0])
+tmp_dst[95:80] := Saturate16(b[63:32])
+tmp_dst[111:96] := Saturate16(b[95:64])
+tmp_dst[127:112] := Saturate16(b[127:96])
+tmp_dst[143:128] := Saturate16(a[159:128])
+tmp_dst[159:144] := Saturate16(a[191:160])
+tmp_dst[175:160] := Saturate16(a[223:192])
+tmp_dst[191:176] := Saturate16(a[255:224])
+tmp_dst[207:192] := Saturate16(b[159:128])
+tmp_dst[223:208] := Saturate16(b[191:160])
+tmp_dst[239:224] := Saturate16(b[223:192])
+tmp_dst[255:240] := Saturate16(b[255:224])
+tmp_dst[271:256] := Saturate16(a[287:256])
+tmp_dst[287:272] := Saturate16(a[319:288])
+tmp_dst[303:288] := Saturate16(a[351:320])
+tmp_dst[319:304] := Saturate16(a[383:352])
+tmp_dst[335:320] := Saturate16(b[287:256])
+tmp_dst[351:336] := Saturate16(b[319:288])
+tmp_dst[367:352] := Saturate16(b[351:320])
+tmp_dst[383:368] := Saturate16(b[383:352])
+tmp_dst[399:384] := Saturate16(a[415:384])
+tmp_dst[415:400] := Saturate16(a[447:416])
+tmp_dst[431:416] := Saturate16(a[479:448])
+tmp_dst[447:432] := Saturate16(a[511:480])
+tmp_dst[463:448] := Saturate16(b[415:384])
+tmp_dst[479:464] := Saturate16(b[447:416])
+tmp_dst[495:480] := Saturate16(b[479:448])
+tmp_dst[511:496] := Saturate16(b[511:480])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="zmm {z}, zmm, zmm" xed="VPACKSSDW_ZMMi16_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:0])
+dst[31:16] := Saturate16(a[63:32])
+dst[47:32] := Saturate16(a[95:64])
+dst[63:48] := Saturate16(a[127:96])
+dst[79:64] := Saturate16(b[31:0])
+dst[95:80] := Saturate16(b[63:32])
+dst[111:96] := Saturate16(b[95:64])
+dst[127:112] := Saturate16(b[127:96])
+dst[143:128] := Saturate16(a[159:128])
+dst[159:144] := Saturate16(a[191:160])
+dst[175:160] := Saturate16(a[223:192])
+dst[191:176] := Saturate16(a[255:224])
+dst[207:192] := Saturate16(b[159:128])
+dst[223:208] := Saturate16(b[191:160])
+dst[239:224] := Saturate16(b[223:192])
+dst[255:240] := Saturate16(b[255:224])
+dst[271:256] := Saturate16(a[287:256])
+dst[287:272] := Saturate16(a[319:288])
+dst[303:288] := Saturate16(a[351:320])
+dst[319:304] := Saturate16(a[383:352])
+dst[335:320] := Saturate16(b[287:256])
+dst[351:336] := Saturate16(b[319:288])
+dst[367:352] := Saturate16(b[351:320])
+dst[383:368] := Saturate16(b[383:352])
+dst[399:384] := Saturate16(a[415:384])
+dst[415:400] := Saturate16(a[447:416])
+dst[431:416] := Saturate16(a[479:448])
+dst[447:432] := Saturate16(a[511:480])
+dst[463:448] := Saturate16(b[415:384])
+dst[479:464] := Saturate16(b[447:416])
+dst[495:480] := Saturate16(b[479:448])
+dst[511:496] := Saturate16(b[511:480])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="zmm, zmm, zmm" xed="VPACKSSDW_ZMMi16_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate16(a[31:0])
+tmp_dst[31:16] := Saturate16(a[63:32])
+tmp_dst[47:32] := Saturate16(a[95:64])
+tmp_dst[63:48] := Saturate16(a[127:96])
+tmp_dst[79:64] := Saturate16(b[31:0])
+tmp_dst[95:80] := Saturate16(b[63:32])
+tmp_dst[111:96] := Saturate16(b[95:64])
+tmp_dst[127:112] := Saturate16(b[127:96])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="xmm {k}, xmm, xmm" xed="VPACKSSDW_XMMi16_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_packs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := Saturate16(a[31:0])
+tmp_dst[31:16] := Saturate16(a[63:32])
+tmp_dst[47:32] := Saturate16(a[95:64])
+tmp_dst[63:48] := Saturate16(a[127:96])
+tmp_dst[79:64] := Saturate16(b[31:0])
+tmp_dst[95:80] := Saturate16(b[63:32])
+tmp_dst[111:96] := Saturate16(b[95:64])
+tmp_dst[127:112] := Saturate16(b[127:96])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKSSDW" form="xmm {z}, xmm, xmm" xed="VPACKSSDW_XMMi16_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="SI8"/>
+	<parameter type="__m256i" varname="src" etype="SI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate8(a[15:0])
+tmp_dst[15:8] := Saturate8(a[31:16])
+tmp_dst[23:16] := Saturate8(a[47:32])
+tmp_dst[31:24] := Saturate8(a[63:48])
+tmp_dst[39:32] := Saturate8(a[79:64])
+tmp_dst[47:40] := Saturate8(a[95:80])
+tmp_dst[55:48] := Saturate8(a[111:96])
+tmp_dst[63:56] := Saturate8(a[127:112])
+tmp_dst[71:64] := Saturate8(b[15:0])
+tmp_dst[79:72] := Saturate8(b[31:16])
+tmp_dst[87:80] := Saturate8(b[47:32])
+tmp_dst[95:88] := Saturate8(b[63:48])
+tmp_dst[103:96] := Saturate8(b[79:64])
+tmp_dst[111:104] := Saturate8(b[95:80])
+tmp_dst[119:112] := Saturate8(b[111:96])
+tmp_dst[127:120] := Saturate8(b[127:112])
+tmp_dst[135:128] := Saturate8(a[143:128])
+tmp_dst[143:136] := Saturate8(a[159:144])
+tmp_dst[151:144] := Saturate8(a[175:160])
+tmp_dst[159:152] := Saturate8(a[191:176])
+tmp_dst[167:160] := Saturate8(a[207:192])
+tmp_dst[175:168] := Saturate8(a[223:208])
+tmp_dst[183:176] := Saturate8(a[239:224])
+tmp_dst[191:184] := Saturate8(a[255:240])
+tmp_dst[199:192] := Saturate8(b[143:128])
+tmp_dst[207:200] := Saturate8(b[159:144])
+tmp_dst[215:208] := Saturate8(b[175:160])
+tmp_dst[223:216] := Saturate8(b[191:176])
+tmp_dst[231:224] := Saturate8(b[207:192])
+tmp_dst[239:232] := Saturate8(b[223:208])
+tmp_dst[247:240] := Saturate8(b[239:224])
+tmp_dst[255:248] := Saturate8(b[255:240])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="ymm {k}, ymm, ymm" xed="VPACKSSWB_YMMi8_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate8(a[15:0])
+tmp_dst[15:8] := Saturate8(a[31:16])
+tmp_dst[23:16] := Saturate8(a[47:32])
+tmp_dst[31:24] := Saturate8(a[63:48])
+tmp_dst[39:32] := Saturate8(a[79:64])
+tmp_dst[47:40] := Saturate8(a[95:80])
+tmp_dst[55:48] := Saturate8(a[111:96])
+tmp_dst[63:56] := Saturate8(a[127:112])
+tmp_dst[71:64] := Saturate8(b[15:0])
+tmp_dst[79:72] := Saturate8(b[31:16])
+tmp_dst[87:80] := Saturate8(b[47:32])
+tmp_dst[95:88] := Saturate8(b[63:48])
+tmp_dst[103:96] := Saturate8(b[79:64])
+tmp_dst[111:104] := Saturate8(b[95:80])
+tmp_dst[119:112] := Saturate8(b[111:96])
+tmp_dst[127:120] := Saturate8(b[127:112])
+tmp_dst[135:128] := Saturate8(a[143:128])
+tmp_dst[143:136] := Saturate8(a[159:144])
+tmp_dst[151:144] := Saturate8(a[175:160])
+tmp_dst[159:152] := Saturate8(a[191:176])
+tmp_dst[167:160] := Saturate8(a[207:192])
+tmp_dst[175:168] := Saturate8(a[223:208])
+tmp_dst[183:176] := Saturate8(a[239:224])
+tmp_dst[191:184] := Saturate8(a[255:240])
+tmp_dst[199:192] := Saturate8(b[143:128])
+tmp_dst[207:200] := Saturate8(b[159:144])
+tmp_dst[215:208] := Saturate8(b[175:160])
+tmp_dst[223:216] := Saturate8(b[191:176])
+tmp_dst[231:224] := Saturate8(b[207:192])
+tmp_dst[239:232] := Saturate8(b[223:208])
+tmp_dst[247:240] := Saturate8(b[239:224])
+tmp_dst[255:248] := Saturate8(b[255:240])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="ymm {z}, ymm, ymm" xed="VPACKSSWB_YMMi8_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="SI8"/>
+	<parameter type="__m512i" varname="src" etype="SI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate8(a[15:0])
+tmp_dst[15:8] := Saturate8(a[31:16])
+tmp_dst[23:16] := Saturate8(a[47:32])
+tmp_dst[31:24] := Saturate8(a[63:48])
+tmp_dst[39:32] := Saturate8(a[79:64])
+tmp_dst[47:40] := Saturate8(a[95:80])
+tmp_dst[55:48] := Saturate8(a[111:96])
+tmp_dst[63:56] := Saturate8(a[127:112])
+tmp_dst[71:64] := Saturate8(b[15:0])
+tmp_dst[79:72] := Saturate8(b[31:16])
+tmp_dst[87:80] := Saturate8(b[47:32])
+tmp_dst[95:88] := Saturate8(b[63:48])
+tmp_dst[103:96] := Saturate8(b[79:64])
+tmp_dst[111:104] := Saturate8(b[95:80])
+tmp_dst[119:112] := Saturate8(b[111:96])
+tmp_dst[127:120] := Saturate8(b[127:112])
+tmp_dst[135:128] := Saturate8(a[143:128])
+tmp_dst[143:136] := Saturate8(a[159:144])
+tmp_dst[151:144] := Saturate8(a[175:160])
+tmp_dst[159:152] := Saturate8(a[191:176])
+tmp_dst[167:160] := Saturate8(a[207:192])
+tmp_dst[175:168] := Saturate8(a[223:208])
+tmp_dst[183:176] := Saturate8(a[239:224])
+tmp_dst[191:184] := Saturate8(a[255:240])
+tmp_dst[199:192] := Saturate8(b[143:128])
+tmp_dst[207:200] := Saturate8(b[159:144])
+tmp_dst[215:208] := Saturate8(b[175:160])
+tmp_dst[223:216] := Saturate8(b[191:176])
+tmp_dst[231:224] := Saturate8(b[207:192])
+tmp_dst[239:232] := Saturate8(b[223:208])
+tmp_dst[247:240] := Saturate8(b[239:224])
+tmp_dst[255:248] := Saturate8(b[255:240])
+tmp_dst[263:256] := Saturate8(a[271:256])
+tmp_dst[271:264] := Saturate8(a[287:272])
+tmp_dst[279:272] := Saturate8(a[303:288])
+tmp_dst[287:280] := Saturate8(a[319:304])
+tmp_dst[295:288] := Saturate8(a[335:320])
+tmp_dst[303:296] := Saturate8(a[351:336])
+tmp_dst[311:304] := Saturate8(a[367:352])
+tmp_dst[319:312] := Saturate8(a[383:368])
+tmp_dst[327:320] := Saturate8(b[271:256])
+tmp_dst[335:328] := Saturate8(b[287:272])
+tmp_dst[343:336] := Saturate8(b[303:288])
+tmp_dst[351:344] := Saturate8(b[319:304])
+tmp_dst[359:352] := Saturate8(b[335:320])
+tmp_dst[367:360] := Saturate8(b[351:336])
+tmp_dst[375:368] := Saturate8(b[367:352])
+tmp_dst[383:376] := Saturate8(b[383:368])
+tmp_dst[391:384] := Saturate8(a[399:384])
+tmp_dst[399:392] := Saturate8(a[415:400])
+tmp_dst[407:400] := Saturate8(a[431:416])
+tmp_dst[415:408] := Saturate8(a[447:432])
+tmp_dst[423:416] := Saturate8(a[463:448])
+tmp_dst[431:424] := Saturate8(a[479:464])
+tmp_dst[439:432] := Saturate8(a[495:480])
+tmp_dst[447:440] := Saturate8(a[511:496])
+tmp_dst[455:448] := Saturate8(b[399:384])
+tmp_dst[463:456] := Saturate8(b[415:400])
+tmp_dst[471:464] := Saturate8(b[431:416])
+tmp_dst[479:472] := Saturate8(b[447:432])
+tmp_dst[487:480] := Saturate8(b[463:448])
+tmp_dst[495:488] := Saturate8(b[479:464])
+tmp_dst[503:496] := Saturate8(b[495:480])
+tmp_dst[511:504] := Saturate8(b[511:496])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="zmm {k}, zmm, zmm" xed="VPACKSSWB_ZMMi8_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate8(a[15:0])
+tmp_dst[15:8] := Saturate8(a[31:16])
+tmp_dst[23:16] := Saturate8(a[47:32])
+tmp_dst[31:24] := Saturate8(a[63:48])
+tmp_dst[39:32] := Saturate8(a[79:64])
+tmp_dst[47:40] := Saturate8(a[95:80])
+tmp_dst[55:48] := Saturate8(a[111:96])
+tmp_dst[63:56] := Saturate8(a[127:112])
+tmp_dst[71:64] := Saturate8(b[15:0])
+tmp_dst[79:72] := Saturate8(b[31:16])
+tmp_dst[87:80] := Saturate8(b[47:32])
+tmp_dst[95:88] := Saturate8(b[63:48])
+tmp_dst[103:96] := Saturate8(b[79:64])
+tmp_dst[111:104] := Saturate8(b[95:80])
+tmp_dst[119:112] := Saturate8(b[111:96])
+tmp_dst[127:120] := Saturate8(b[127:112])
+tmp_dst[135:128] := Saturate8(a[143:128])
+tmp_dst[143:136] := Saturate8(a[159:144])
+tmp_dst[151:144] := Saturate8(a[175:160])
+tmp_dst[159:152] := Saturate8(a[191:176])
+tmp_dst[167:160] := Saturate8(a[207:192])
+tmp_dst[175:168] := Saturate8(a[223:208])
+tmp_dst[183:176] := Saturate8(a[239:224])
+tmp_dst[191:184] := Saturate8(a[255:240])
+tmp_dst[199:192] := Saturate8(b[143:128])
+tmp_dst[207:200] := Saturate8(b[159:144])
+tmp_dst[215:208] := Saturate8(b[175:160])
+tmp_dst[223:216] := Saturate8(b[191:176])
+tmp_dst[231:224] := Saturate8(b[207:192])
+tmp_dst[239:232] := Saturate8(b[223:208])
+tmp_dst[247:240] := Saturate8(b[239:224])
+tmp_dst[255:248] := Saturate8(b[255:240])
+tmp_dst[263:256] := Saturate8(a[271:256])
+tmp_dst[271:264] := Saturate8(a[287:272])
+tmp_dst[279:272] := Saturate8(a[303:288])
+tmp_dst[287:280] := Saturate8(a[319:304])
+tmp_dst[295:288] := Saturate8(a[335:320])
+tmp_dst[303:296] := Saturate8(a[351:336])
+tmp_dst[311:304] := Saturate8(a[367:352])
+tmp_dst[319:312] := Saturate8(a[383:368])
+tmp_dst[327:320] := Saturate8(b[271:256])
+tmp_dst[335:328] := Saturate8(b[287:272])
+tmp_dst[343:336] := Saturate8(b[303:288])
+tmp_dst[351:344] := Saturate8(b[319:304])
+tmp_dst[359:352] := Saturate8(b[335:320])
+tmp_dst[367:360] := Saturate8(b[351:336])
+tmp_dst[375:368] := Saturate8(b[367:352])
+tmp_dst[383:376] := Saturate8(b[383:368])
+tmp_dst[391:384] := Saturate8(a[399:384])
+tmp_dst[399:392] := Saturate8(a[415:400])
+tmp_dst[407:400] := Saturate8(a[431:416])
+tmp_dst[415:408] := Saturate8(a[447:432])
+tmp_dst[423:416] := Saturate8(a[463:448])
+tmp_dst[431:424] := Saturate8(a[479:464])
+tmp_dst[439:432] := Saturate8(a[495:480])
+tmp_dst[447:440] := Saturate8(a[511:496])
+tmp_dst[455:448] := Saturate8(b[399:384])
+tmp_dst[463:456] := Saturate8(b[415:400])
+tmp_dst[471:464] := Saturate8(b[431:416])
+tmp_dst[479:472] := Saturate8(b[447:432])
+tmp_dst[487:480] := Saturate8(b[463:448])
+tmp_dst[495:488] := Saturate8(b[479:464])
+tmp_dst[503:496] := Saturate8(b[495:480])
+tmp_dst[511:504] := Saturate8(b[511:496])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="zmm {z}, zmm, zmm" xed="VPACKSSWB_ZMMi8_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="SI8"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate8(a[15:0])
+dst[15:8] := Saturate8(a[31:16])
+dst[23:16] := Saturate8(a[47:32])
+dst[31:24] := Saturate8(a[63:48])
+dst[39:32] := Saturate8(a[79:64])
+dst[47:40] := Saturate8(a[95:80])
+dst[55:48] := Saturate8(a[111:96])
+dst[63:56] := Saturate8(a[127:112])
+dst[71:64] := Saturate8(b[15:0])
+dst[79:72] := Saturate8(b[31:16])
+dst[87:80] := Saturate8(b[47:32])
+dst[95:88] := Saturate8(b[63:48])
+dst[103:96] := Saturate8(b[79:64])
+dst[111:104] := Saturate8(b[95:80])
+dst[119:112] := Saturate8(b[111:96])
+dst[127:120] := Saturate8(b[127:112])
+dst[135:128] := Saturate8(a[143:128])
+dst[143:136] := Saturate8(a[159:144])
+dst[151:144] := Saturate8(a[175:160])
+dst[159:152] := Saturate8(a[191:176])
+dst[167:160] := Saturate8(a[207:192])
+dst[175:168] := Saturate8(a[223:208])
+dst[183:176] := Saturate8(a[239:224])
+dst[191:184] := Saturate8(a[255:240])
+dst[199:192] := Saturate8(b[143:128])
+dst[207:200] := Saturate8(b[159:144])
+dst[215:208] := Saturate8(b[175:160])
+dst[223:216] := Saturate8(b[191:176])
+dst[231:224] := Saturate8(b[207:192])
+dst[239:232] := Saturate8(b[223:208])
+dst[247:240] := Saturate8(b[239:224])
+dst[255:248] := Saturate8(b[255:240])
+dst[263:256] := Saturate8(a[271:256])
+dst[271:264] := Saturate8(a[287:272])
+dst[279:272] := Saturate8(a[303:288])
+dst[287:280] := Saturate8(a[319:304])
+dst[295:288] := Saturate8(a[335:320])
+dst[303:296] := Saturate8(a[351:336])
+dst[311:304] := Saturate8(a[367:352])
+dst[319:312] := Saturate8(a[383:368])
+dst[327:320] := Saturate8(b[271:256])
+dst[335:328] := Saturate8(b[287:272])
+dst[343:336] := Saturate8(b[303:288])
+dst[351:344] := Saturate8(b[319:304])
+dst[359:352] := Saturate8(b[335:320])
+dst[367:360] := Saturate8(b[351:336])
+dst[375:368] := Saturate8(b[367:352])
+dst[383:376] := Saturate8(b[383:368])
+dst[391:384] := Saturate8(a[399:384])
+dst[399:392] := Saturate8(a[415:400])
+dst[407:400] := Saturate8(a[431:416])
+dst[415:408] := Saturate8(a[447:432])
+dst[423:416] := Saturate8(a[463:448])
+dst[431:424] := Saturate8(a[479:464])
+dst[439:432] := Saturate8(a[495:480])
+dst[447:440] := Saturate8(a[511:496])
+dst[455:448] := Saturate8(b[399:384])
+dst[463:456] := Saturate8(b[415:400])
+dst[471:464] := Saturate8(b[431:416])
+dst[479:472] := Saturate8(b[447:432])
+dst[487:480] := Saturate8(b[463:448])
+dst[495:488] := Saturate8(b[479:464])
+dst[503:496] := Saturate8(b[495:480])
+dst[511:504] := Saturate8(b[511:496])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="zmm, zmm, zmm" xed="VPACKSSWB_ZMMi8_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="SI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate8(a[15:0])
+tmp_dst[15:8] := Saturate8(a[31:16])
+tmp_dst[23:16] := Saturate8(a[47:32])
+tmp_dst[31:24] := Saturate8(a[63:48])
+tmp_dst[39:32] := Saturate8(a[79:64])
+tmp_dst[47:40] := Saturate8(a[95:80])
+tmp_dst[55:48] := Saturate8(a[111:96])
+tmp_dst[63:56] := Saturate8(a[127:112])
+tmp_dst[71:64] := Saturate8(b[15:0])
+tmp_dst[79:72] := Saturate8(b[31:16])
+tmp_dst[87:80] := Saturate8(b[47:32])
+tmp_dst[95:88] := Saturate8(b[63:48])
+tmp_dst[103:96] := Saturate8(b[79:64])
+tmp_dst[111:104] := Saturate8(b[95:80])
+tmp_dst[119:112] := Saturate8(b[111:96])
+tmp_dst[127:120] := Saturate8(b[127:112])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="xmm {k}, xmm, xmm" xed="VPACKSSWB_XMMi8_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_packs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := Saturate8(a[15:0])
+tmp_dst[15:8] := Saturate8(a[31:16])
+tmp_dst[23:16] := Saturate8(a[47:32])
+tmp_dst[31:24] := Saturate8(a[63:48])
+tmp_dst[39:32] := Saturate8(a[79:64])
+tmp_dst[47:40] := Saturate8(a[95:80])
+tmp_dst[55:48] := Saturate8(a[111:96])
+tmp_dst[63:56] := Saturate8(a[127:112])
+tmp_dst[71:64] := Saturate8(b[15:0])
+tmp_dst[79:72] := Saturate8(b[31:16])
+tmp_dst[87:80] := Saturate8(b[47:32])
+tmp_dst[95:88] := Saturate8(b[63:48])
+tmp_dst[103:96] := Saturate8(b[79:64])
+tmp_dst[111:104] := Saturate8(b[95:80])
+tmp_dst[119:112] := Saturate8(b[111:96])
+tmp_dst[127:120] := Saturate8(b[127:112])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKSSWB" form="xmm {z}, xmm, xmm" xed="VPACKSSWB_XMMi8_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := SaturateU16(a[31:0])
+tmp_dst[31:16] := SaturateU16(a[63:32])
+tmp_dst[47:32] := SaturateU16(a[95:64])
+tmp_dst[63:48] := SaturateU16(a[127:96])
+tmp_dst[79:64] := SaturateU16(b[31:0])
+tmp_dst[95:80] := SaturateU16(b[63:32])
+tmp_dst[111:96] := SaturateU16(b[95:64])
+tmp_dst[127:112] := SaturateU16(b[127:96])
+tmp_dst[143:128] := SaturateU16(a[159:128])
+tmp_dst[159:144] := SaturateU16(a[191:160])
+tmp_dst[175:160] := SaturateU16(a[223:192])
+tmp_dst[191:176] := SaturateU16(a[255:224])
+tmp_dst[207:192] := SaturateU16(b[159:128])
+tmp_dst[223:208] := SaturateU16(b[191:160])
+tmp_dst[239:224] := SaturateU16(b[223:192])
+tmp_dst[255:240] := SaturateU16(b[255:224])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="ymm {k}, ymm, ymm" xed="VPACKUSDW_YMMu16_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := SaturateU16(a[31:0])
+tmp_dst[31:16] := SaturateU16(a[63:32])
+tmp_dst[47:32] := SaturateU16(a[95:64])
+tmp_dst[63:48] := SaturateU16(a[127:96])
+tmp_dst[79:64] := SaturateU16(b[31:0])
+tmp_dst[95:80] := SaturateU16(b[63:32])
+tmp_dst[111:96] := SaturateU16(b[95:64])
+tmp_dst[127:112] := SaturateU16(b[127:96])
+tmp_dst[143:128] := SaturateU16(a[159:128])
+tmp_dst[159:144] := SaturateU16(a[191:160])
+tmp_dst[175:160] := SaturateU16(a[223:192])
+tmp_dst[191:176] := SaturateU16(a[255:224])
+tmp_dst[207:192] := SaturateU16(b[159:128])
+tmp_dst[223:208] := SaturateU16(b[191:160])
+tmp_dst[239:224] := SaturateU16(b[223:192])
+tmp_dst[255:240] := SaturateU16(b[255:224])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="ymm {z}, ymm, ymm" xed="VPACKUSDW_YMMu16_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := SaturateU16(a[31:0])
+tmp_dst[31:16] := SaturateU16(a[63:32])
+tmp_dst[47:32] := SaturateU16(a[95:64])
+tmp_dst[63:48] := SaturateU16(a[127:96])
+tmp_dst[79:64] := SaturateU16(b[31:0])
+tmp_dst[95:80] := SaturateU16(b[63:32])
+tmp_dst[111:96] := SaturateU16(b[95:64])
+tmp_dst[127:112] := SaturateU16(b[127:96])
+tmp_dst[143:128] := SaturateU16(a[159:128])
+tmp_dst[159:144] := SaturateU16(a[191:160])
+tmp_dst[175:160] := SaturateU16(a[223:192])
+tmp_dst[191:176] := SaturateU16(a[255:224])
+tmp_dst[207:192] := SaturateU16(b[159:128])
+tmp_dst[223:208] := SaturateU16(b[191:160])
+tmp_dst[239:224] := SaturateU16(b[223:192])
+tmp_dst[255:240] := SaturateU16(b[255:224])
+tmp_dst[271:256] := SaturateU16(a[287:256])
+tmp_dst[287:272] := SaturateU16(a[319:288])
+tmp_dst[303:288] := SaturateU16(a[351:320])
+tmp_dst[319:304] := SaturateU16(a[383:352])
+tmp_dst[335:320] := SaturateU16(b[287:256])
+tmp_dst[351:336] := SaturateU16(b[319:288])
+tmp_dst[367:352] := SaturateU16(b[351:320])
+tmp_dst[383:368] := SaturateU16(b[383:352])
+tmp_dst[399:384] := SaturateU16(a[415:384])
+tmp_dst[415:400] := SaturateU16(a[447:416])
+tmp_dst[431:416] := SaturateU16(a[479:448])
+tmp_dst[447:432] := SaturateU16(a[511:480])
+tmp_dst[463:448] := SaturateU16(b[415:384])
+tmp_dst[479:464] := SaturateU16(b[447:416])
+tmp_dst[495:480] := SaturateU16(b[479:448])
+tmp_dst[511:496] := SaturateU16(b[511:480])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="zmm {k}, zmm, zmm" xed="VPACKUSDW_ZMMu16_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := SaturateU16(a[31:0])
+tmp_dst[31:16] := SaturateU16(a[63:32])
+tmp_dst[47:32] := SaturateU16(a[95:64])
+tmp_dst[63:48] := SaturateU16(a[127:96])
+tmp_dst[79:64] := SaturateU16(b[31:0])
+tmp_dst[95:80] := SaturateU16(b[63:32])
+tmp_dst[111:96] := SaturateU16(b[95:64])
+tmp_dst[127:112] := SaturateU16(b[127:96])
+tmp_dst[143:128] := SaturateU16(a[159:128])
+tmp_dst[159:144] := SaturateU16(a[191:160])
+tmp_dst[175:160] := SaturateU16(a[223:192])
+tmp_dst[191:176] := SaturateU16(a[255:224])
+tmp_dst[207:192] := SaturateU16(b[159:128])
+tmp_dst[223:208] := SaturateU16(b[191:160])
+tmp_dst[239:224] := SaturateU16(b[223:192])
+tmp_dst[255:240] := SaturateU16(b[255:224])
+tmp_dst[271:256] := SaturateU16(a[287:256])
+tmp_dst[287:272] := SaturateU16(a[319:288])
+tmp_dst[303:288] := SaturateU16(a[351:320])
+tmp_dst[319:304] := SaturateU16(a[383:352])
+tmp_dst[335:320] := SaturateU16(b[287:256])
+tmp_dst[351:336] := SaturateU16(b[319:288])
+tmp_dst[367:352] := SaturateU16(b[351:320])
+tmp_dst[383:368] := SaturateU16(b[383:352])
+tmp_dst[399:384] := SaturateU16(a[415:384])
+tmp_dst[415:400] := SaturateU16(a[447:416])
+tmp_dst[431:416] := SaturateU16(a[479:448])
+tmp_dst[447:432] := SaturateU16(a[511:480])
+tmp_dst[463:448] := SaturateU16(b[415:384])
+tmp_dst[479:464] := SaturateU16(b[447:416])
+tmp_dst[495:480] := SaturateU16(b[479:448])
+tmp_dst[511:496] := SaturateU16(b[511:480])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="zmm {z}, zmm, zmm" xed="VPACKUSDW_ZMMu16_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := SaturateU16(a[31:0])
+dst[31:16] := SaturateU16(a[63:32])
+dst[47:32] := SaturateU16(a[95:64])
+dst[63:48] := SaturateU16(a[127:96])
+dst[79:64] := SaturateU16(b[31:0])
+dst[95:80] := SaturateU16(b[63:32])
+dst[111:96] := SaturateU16(b[95:64])
+dst[127:112] := SaturateU16(b[127:96])
+dst[143:128] := SaturateU16(a[159:128])
+dst[159:144] := SaturateU16(a[191:160])
+dst[175:160] := SaturateU16(a[223:192])
+dst[191:176] := SaturateU16(a[255:224])
+dst[207:192] := SaturateU16(b[159:128])
+dst[223:208] := SaturateU16(b[191:160])
+dst[239:224] := SaturateU16(b[223:192])
+dst[255:240] := SaturateU16(b[255:224])
+dst[271:256] := SaturateU16(a[287:256])
+dst[287:272] := SaturateU16(a[319:288])
+dst[303:288] := SaturateU16(a[351:320])
+dst[319:304] := SaturateU16(a[383:352])
+dst[335:320] := SaturateU16(b[287:256])
+dst[351:336] := SaturateU16(b[319:288])
+dst[367:352] := SaturateU16(b[351:320])
+dst[383:368] := SaturateU16(b[383:352])
+dst[399:384] := SaturateU16(a[415:384])
+dst[415:400] := SaturateU16(a[447:416])
+dst[431:416] := SaturateU16(a[479:448])
+dst[447:432] := SaturateU16(a[511:480])
+dst[463:448] := SaturateU16(b[415:384])
+dst[479:464] := SaturateU16(b[447:416])
+dst[495:480] := SaturateU16(b[479:448])
+dst[511:496] := SaturateU16(b[511:480])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="zmm, zmm, zmm" xed="VPACKUSDW_ZMMu16_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := SaturateU16(a[31:0])
+tmp_dst[31:16] := SaturateU16(a[63:32])
+tmp_dst[47:32] := SaturateU16(a[95:64])
+tmp_dst[63:48] := SaturateU16(a[127:96])
+tmp_dst[79:64] := SaturateU16(b[31:0])
+tmp_dst[95:80] := SaturateU16(b[63:32])
+tmp_dst[111:96] := SaturateU16(b[95:64])
+tmp_dst[127:112] := SaturateU16(b[127:96])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="xmm {k}, xmm, xmm" xed="VPACKUSDW_XMMu16_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_packus_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := SaturateU16(a[31:0])
+tmp_dst[31:16] := SaturateU16(a[63:32])
+tmp_dst[47:32] := SaturateU16(a[95:64])
+tmp_dst[63:48] := SaturateU16(a[127:96])
+tmp_dst[79:64] := SaturateU16(b[31:0])
+tmp_dst[95:80] := SaturateU16(b[63:32])
+tmp_dst[111:96] := SaturateU16(b[95:64])
+tmp_dst[127:112] := SaturateU16(b[127:96])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKUSDW" form="xmm {z}, xmm, xmm" xed="VPACKUSDW_XMMu16_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := SaturateU8(a[15:0])
+tmp_dst[15:8] := SaturateU8(a[31:16])
+tmp_dst[23:16] := SaturateU8(a[47:32])
+tmp_dst[31:24] := SaturateU8(a[63:48])
+tmp_dst[39:32] := SaturateU8(a[79:64])
+tmp_dst[47:40] := SaturateU8(a[95:80])
+tmp_dst[55:48] := SaturateU8(a[111:96])
+tmp_dst[63:56] := SaturateU8(a[127:112])
+tmp_dst[71:64] := SaturateU8(b[15:0])
+tmp_dst[79:72] := SaturateU8(b[31:16])
+tmp_dst[87:80] := SaturateU8(b[47:32])
+tmp_dst[95:88] := SaturateU8(b[63:48])
+tmp_dst[103:96] := SaturateU8(b[79:64])
+tmp_dst[111:104] := SaturateU8(b[95:80])
+tmp_dst[119:112] := SaturateU8(b[111:96])
+tmp_dst[127:120] := SaturateU8(b[127:112])
+tmp_dst[135:128] := SaturateU8(a[143:128])
+tmp_dst[143:136] := SaturateU8(a[159:144])
+tmp_dst[151:144] := SaturateU8(a[175:160])
+tmp_dst[159:152] := SaturateU8(a[191:176])
+tmp_dst[167:160] := SaturateU8(a[207:192])
+tmp_dst[175:168] := SaturateU8(a[223:208])
+tmp_dst[183:176] := SaturateU8(a[239:224])
+tmp_dst[191:184] := SaturateU8(a[255:240])
+tmp_dst[199:192] := SaturateU8(b[143:128])
+tmp_dst[207:200] := SaturateU8(b[159:144])
+tmp_dst[215:208] := SaturateU8(b[175:160])
+tmp_dst[223:216] := SaturateU8(b[191:176])
+tmp_dst[231:224] := SaturateU8(b[207:192])
+tmp_dst[239:232] := SaturateU8(b[223:208])
+tmp_dst[247:240] := SaturateU8(b[239:224])
+tmp_dst[255:248] := SaturateU8(b[255:240])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="ymm {k}, ymm, ymm" xed="VPACKUSWB_YMMu8_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := SaturateU8(a[15:0])
+tmp_dst[15:8] := SaturateU8(a[31:16])
+tmp_dst[23:16] := SaturateU8(a[47:32])
+tmp_dst[31:24] := SaturateU8(a[63:48])
+tmp_dst[39:32] := SaturateU8(a[79:64])
+tmp_dst[47:40] := SaturateU8(a[95:80])
+tmp_dst[55:48] := SaturateU8(a[111:96])
+tmp_dst[63:56] := SaturateU8(a[127:112])
+tmp_dst[71:64] := SaturateU8(b[15:0])
+tmp_dst[79:72] := SaturateU8(b[31:16])
+tmp_dst[87:80] := SaturateU8(b[47:32])
+tmp_dst[95:88] := SaturateU8(b[63:48])
+tmp_dst[103:96] := SaturateU8(b[79:64])
+tmp_dst[111:104] := SaturateU8(b[95:80])
+tmp_dst[119:112] := SaturateU8(b[111:96])
+tmp_dst[127:120] := SaturateU8(b[127:112])
+tmp_dst[135:128] := SaturateU8(a[143:128])
+tmp_dst[143:136] := SaturateU8(a[159:144])
+tmp_dst[151:144] := SaturateU8(a[175:160])
+tmp_dst[159:152] := SaturateU8(a[191:176])
+tmp_dst[167:160] := SaturateU8(a[207:192])
+tmp_dst[175:168] := SaturateU8(a[223:208])
+tmp_dst[183:176] := SaturateU8(a[239:224])
+tmp_dst[191:184] := SaturateU8(a[255:240])
+tmp_dst[199:192] := SaturateU8(b[143:128])
+tmp_dst[207:200] := SaturateU8(b[159:144])
+tmp_dst[215:208] := SaturateU8(b[175:160])
+tmp_dst[223:216] := SaturateU8(b[191:176])
+tmp_dst[231:224] := SaturateU8(b[207:192])
+tmp_dst[239:232] := SaturateU8(b[223:208])
+tmp_dst[247:240] := SaturateU8(b[239:224])
+tmp_dst[255:248] := SaturateU8(b[255:240])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="ymm {z}, ymm, ymm" xed="VPACKUSWB_YMMu8_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := SaturateU8(a[15:0])
+tmp_dst[15:8] := SaturateU8(a[31:16])
+tmp_dst[23:16] := SaturateU8(a[47:32])
+tmp_dst[31:24] := SaturateU8(a[63:48])
+tmp_dst[39:32] := SaturateU8(a[79:64])
+tmp_dst[47:40] := SaturateU8(a[95:80])
+tmp_dst[55:48] := SaturateU8(a[111:96])
+tmp_dst[63:56] := SaturateU8(a[127:112])
+tmp_dst[71:64] := SaturateU8(b[15:0])
+tmp_dst[79:72] := SaturateU8(b[31:16])
+tmp_dst[87:80] := SaturateU8(b[47:32])
+tmp_dst[95:88] := SaturateU8(b[63:48])
+tmp_dst[103:96] := SaturateU8(b[79:64])
+tmp_dst[111:104] := SaturateU8(b[95:80])
+tmp_dst[119:112] := SaturateU8(b[111:96])
+tmp_dst[127:120] := SaturateU8(b[127:112])
+tmp_dst[135:128] := SaturateU8(a[143:128])
+tmp_dst[143:136] := SaturateU8(a[159:144])
+tmp_dst[151:144] := SaturateU8(a[175:160])
+tmp_dst[159:152] := SaturateU8(a[191:176])
+tmp_dst[167:160] := SaturateU8(a[207:192])
+tmp_dst[175:168] := SaturateU8(a[223:208])
+tmp_dst[183:176] := SaturateU8(a[239:224])
+tmp_dst[191:184] := SaturateU8(a[255:240])
+tmp_dst[199:192] := SaturateU8(b[143:128])
+tmp_dst[207:200] := SaturateU8(b[159:144])
+tmp_dst[215:208] := SaturateU8(b[175:160])
+tmp_dst[223:216] := SaturateU8(b[191:176])
+tmp_dst[231:224] := SaturateU8(b[207:192])
+tmp_dst[239:232] := SaturateU8(b[223:208])
+tmp_dst[247:240] := SaturateU8(b[239:224])
+tmp_dst[255:248] := SaturateU8(b[255:240])
+tmp_dst[263:256] := SaturateU8(a[271:256])
+tmp_dst[271:264] := SaturateU8(a[287:272])
+tmp_dst[279:272] := SaturateU8(a[303:288])
+tmp_dst[287:280] := SaturateU8(a[319:304])
+tmp_dst[295:288] := SaturateU8(a[335:320])
+tmp_dst[303:296] := SaturateU8(a[351:336])
+tmp_dst[311:304] := SaturateU8(a[367:352])
+tmp_dst[319:312] := SaturateU8(a[383:368])
+tmp_dst[327:320] := SaturateU8(b[271:256])
+tmp_dst[335:328] := SaturateU8(b[287:272])
+tmp_dst[343:336] := SaturateU8(b[303:288])
+tmp_dst[351:344] := SaturateU8(b[319:304])
+tmp_dst[359:352] := SaturateU8(b[335:320])
+tmp_dst[367:360] := SaturateU8(b[351:336])
+tmp_dst[375:368] := SaturateU8(b[367:352])
+tmp_dst[383:376] := SaturateU8(b[383:368])
+tmp_dst[391:384] := SaturateU8(a[399:384])
+tmp_dst[399:392] := SaturateU8(a[415:400])
+tmp_dst[407:400] := SaturateU8(a[431:416])
+tmp_dst[415:408] := SaturateU8(a[447:432])
+tmp_dst[423:416] := SaturateU8(a[463:448])
+tmp_dst[431:424] := SaturateU8(a[479:464])
+tmp_dst[439:432] := SaturateU8(a[495:480])
+tmp_dst[447:440] := SaturateU8(a[511:496])
+tmp_dst[455:448] := SaturateU8(b[399:384])
+tmp_dst[463:456] := SaturateU8(b[415:400])
+tmp_dst[471:464] := SaturateU8(b[431:416])
+tmp_dst[479:472] := SaturateU8(b[447:432])
+tmp_dst[487:480] := SaturateU8(b[463:448])
+tmp_dst[495:488] := SaturateU8(b[479:464])
+tmp_dst[503:496] := SaturateU8(b[495:480])
+tmp_dst[511:504] := SaturateU8(b[511:496])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="zmm {k}, zmm, zmm" xed="VPACKUSWB_ZMMu8_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := SaturateU8(a[15:0])
+tmp_dst[15:8] := SaturateU8(a[31:16])
+tmp_dst[23:16] := SaturateU8(a[47:32])
+tmp_dst[31:24] := SaturateU8(a[63:48])
+tmp_dst[39:32] := SaturateU8(a[79:64])
+tmp_dst[47:40] := SaturateU8(a[95:80])
+tmp_dst[55:48] := SaturateU8(a[111:96])
+tmp_dst[63:56] := SaturateU8(a[127:112])
+tmp_dst[71:64] := SaturateU8(b[15:0])
+tmp_dst[79:72] := SaturateU8(b[31:16])
+tmp_dst[87:80] := SaturateU8(b[47:32])
+tmp_dst[95:88] := SaturateU8(b[63:48])
+tmp_dst[103:96] := SaturateU8(b[79:64])
+tmp_dst[111:104] := SaturateU8(b[95:80])
+tmp_dst[119:112] := SaturateU8(b[111:96])
+tmp_dst[127:120] := SaturateU8(b[127:112])
+tmp_dst[135:128] := SaturateU8(a[143:128])
+tmp_dst[143:136] := SaturateU8(a[159:144])
+tmp_dst[151:144] := SaturateU8(a[175:160])
+tmp_dst[159:152] := SaturateU8(a[191:176])
+tmp_dst[167:160] := SaturateU8(a[207:192])
+tmp_dst[175:168] := SaturateU8(a[223:208])
+tmp_dst[183:176] := SaturateU8(a[239:224])
+tmp_dst[191:184] := SaturateU8(a[255:240])
+tmp_dst[199:192] := SaturateU8(b[143:128])
+tmp_dst[207:200] := SaturateU8(b[159:144])
+tmp_dst[215:208] := SaturateU8(b[175:160])
+tmp_dst[223:216] := SaturateU8(b[191:176])
+tmp_dst[231:224] := SaturateU8(b[207:192])
+tmp_dst[239:232] := SaturateU8(b[223:208])
+tmp_dst[247:240] := SaturateU8(b[239:224])
+tmp_dst[255:248] := SaturateU8(b[255:240])
+tmp_dst[263:256] := SaturateU8(a[271:256])
+tmp_dst[271:264] := SaturateU8(a[287:272])
+tmp_dst[279:272] := SaturateU8(a[303:288])
+tmp_dst[287:280] := SaturateU8(a[319:304])
+tmp_dst[295:288] := SaturateU8(a[335:320])
+tmp_dst[303:296] := SaturateU8(a[351:336])
+tmp_dst[311:304] := SaturateU8(a[367:352])
+tmp_dst[319:312] := SaturateU8(a[383:368])
+tmp_dst[327:320] := SaturateU8(b[271:256])
+tmp_dst[335:328] := SaturateU8(b[287:272])
+tmp_dst[343:336] := SaturateU8(b[303:288])
+tmp_dst[351:344] := SaturateU8(b[319:304])
+tmp_dst[359:352] := SaturateU8(b[335:320])
+tmp_dst[367:360] := SaturateU8(b[351:336])
+tmp_dst[375:368] := SaturateU8(b[367:352])
+tmp_dst[383:376] := SaturateU8(b[383:368])
+tmp_dst[391:384] := SaturateU8(a[399:384])
+tmp_dst[399:392] := SaturateU8(a[415:400])
+tmp_dst[407:400] := SaturateU8(a[431:416])
+tmp_dst[415:408] := SaturateU8(a[447:432])
+tmp_dst[423:416] := SaturateU8(a[463:448])
+tmp_dst[431:424] := SaturateU8(a[479:464])
+tmp_dst[439:432] := SaturateU8(a[495:480])
+tmp_dst[447:440] := SaturateU8(a[511:496])
+tmp_dst[455:448] := SaturateU8(b[399:384])
+tmp_dst[463:456] := SaturateU8(b[415:400])
+tmp_dst[471:464] := SaturateU8(b[431:416])
+tmp_dst[479:472] := SaturateU8(b[447:432])
+tmp_dst[487:480] := SaturateU8(b[463:448])
+tmp_dst[495:488] := SaturateU8(b[479:464])
+tmp_dst[503:496] := SaturateU8(b[495:480])
+tmp_dst[511:504] := SaturateU8(b[511:496])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="zmm {z}, zmm, zmm" xed="VPACKUSWB_ZMMu8_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := SaturateU8(a[15:0])
+dst[15:8] := SaturateU8(a[31:16])
+dst[23:16] := SaturateU8(a[47:32])
+dst[31:24] := SaturateU8(a[63:48])
+dst[39:32] := SaturateU8(a[79:64])
+dst[47:40] := SaturateU8(a[95:80])
+dst[55:48] := SaturateU8(a[111:96])
+dst[63:56] := SaturateU8(a[127:112])
+dst[71:64] := SaturateU8(b[15:0])
+dst[79:72] := SaturateU8(b[31:16])
+dst[87:80] := SaturateU8(b[47:32])
+dst[95:88] := SaturateU8(b[63:48])
+dst[103:96] := SaturateU8(b[79:64])
+dst[111:104] := SaturateU8(b[95:80])
+dst[119:112] := SaturateU8(b[111:96])
+dst[127:120] := SaturateU8(b[127:112])
+dst[135:128] := SaturateU8(a[143:128])
+dst[143:136] := SaturateU8(a[159:144])
+dst[151:144] := SaturateU8(a[175:160])
+dst[159:152] := SaturateU8(a[191:176])
+dst[167:160] := SaturateU8(a[207:192])
+dst[175:168] := SaturateU8(a[223:208])
+dst[183:176] := SaturateU8(a[239:224])
+dst[191:184] := SaturateU8(a[255:240])
+dst[199:192] := SaturateU8(b[143:128])
+dst[207:200] := SaturateU8(b[159:144])
+dst[215:208] := SaturateU8(b[175:160])
+dst[223:216] := SaturateU8(b[191:176])
+dst[231:224] := SaturateU8(b[207:192])
+dst[239:232] := SaturateU8(b[223:208])
+dst[247:240] := SaturateU8(b[239:224])
+dst[255:248] := SaturateU8(b[255:240])
+dst[263:256] := SaturateU8(a[271:256])
+dst[271:264] := SaturateU8(a[287:272])
+dst[279:272] := SaturateU8(a[303:288])
+dst[287:280] := SaturateU8(a[319:304])
+dst[295:288] := SaturateU8(a[335:320])
+dst[303:296] := SaturateU8(a[351:336])
+dst[311:304] := SaturateU8(a[367:352])
+dst[319:312] := SaturateU8(a[383:368])
+dst[327:320] := SaturateU8(b[271:256])
+dst[335:328] := SaturateU8(b[287:272])
+dst[343:336] := SaturateU8(b[303:288])
+dst[351:344] := SaturateU8(b[319:304])
+dst[359:352] := SaturateU8(b[335:320])
+dst[367:360] := SaturateU8(b[351:336])
+dst[375:368] := SaturateU8(b[367:352])
+dst[383:376] := SaturateU8(b[383:368])
+dst[391:384] := SaturateU8(a[399:384])
+dst[399:392] := SaturateU8(a[415:400])
+dst[407:400] := SaturateU8(a[431:416])
+dst[415:408] := SaturateU8(a[447:432])
+dst[423:416] := SaturateU8(a[463:448])
+dst[431:424] := SaturateU8(a[479:464])
+dst[439:432] := SaturateU8(a[495:480])
+dst[447:440] := SaturateU8(a[511:496])
+dst[455:448] := SaturateU8(b[399:384])
+dst[463:456] := SaturateU8(b[415:400])
+dst[471:464] := SaturateU8(b[431:416])
+dst[479:472] := SaturateU8(b[447:432])
+dst[487:480] := SaturateU8(b[463:448])
+dst[495:488] := SaturateU8(b[479:464])
+dst[503:496] := SaturateU8(b[495:480])
+dst[511:504] := SaturateU8(b[511:496])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="zmm, zmm, zmm" xed="VPACKUSWB_ZMMu8_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := SaturateU8(a[15:0])
+tmp_dst[15:8] := SaturateU8(a[31:16])
+tmp_dst[23:16] := SaturateU8(a[47:32])
+tmp_dst[31:24] := SaturateU8(a[63:48])
+tmp_dst[39:32] := SaturateU8(a[79:64])
+tmp_dst[47:40] := SaturateU8(a[95:80])
+tmp_dst[55:48] := SaturateU8(a[111:96])
+tmp_dst[63:56] := SaturateU8(a[127:112])
+tmp_dst[71:64] := SaturateU8(b[15:0])
+tmp_dst[79:72] := SaturateU8(b[31:16])
+tmp_dst[87:80] := SaturateU8(b[47:32])
+tmp_dst[95:88] := SaturateU8(b[63:48])
+tmp_dst[103:96] := SaturateU8(b[79:64])
+tmp_dst[111:104] := SaturateU8(b[95:80])
+tmp_dst[119:112] := SaturateU8(b[111:96])
+tmp_dst[127:120] := SaturateU8(b[127:112])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="xmm {k}, xmm, xmm" xed="VPACKUSWB_XMMu8_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_packus_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[7:0] := SaturateU8(a[15:0])
+tmp_dst[15:8] := SaturateU8(a[31:16])
+tmp_dst[23:16] := SaturateU8(a[47:32])
+tmp_dst[31:24] := SaturateU8(a[63:48])
+tmp_dst[39:32] := SaturateU8(a[79:64])
+tmp_dst[47:40] := SaturateU8(a[95:80])
+tmp_dst[55:48] := SaturateU8(a[111:96])
+tmp_dst[63:56] := SaturateU8(a[127:112])
+tmp_dst[71:64] := SaturateU8(b[15:0])
+tmp_dst[79:72] := SaturateU8(b[31:16])
+tmp_dst[87:80] := SaturateU8(b[47:32])
+tmp_dst[95:88] := SaturateU8(b[63:48])
+tmp_dst[103:96] := SaturateU8(b[79:64])
+tmp_dst[111:104] := SaturateU8(b[95:80])
+tmp_dst[119:112] := SaturateU8(b[111:96])
+tmp_dst[127:120] := SaturateU8(b[127:112])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPACKUSWB" form="xmm {z}, xmm, xmm" xed="VPACKUSWB_XMMu8_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDB" form="ymm {k}, ymm, ymm" xed="VPADDB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDB" form="ymm {z}, ymm, ymm" xed="VPADDB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDB" form="zmm, zmm, zmm" xed="VPADDB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDB" form="zmm {k}, zmm, zmm" xed="VPADDB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDB" form="zmm {z}, zmm, zmm" xed="VPADDB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDB" form="xmm {k}, xmm, xmm" xed="VPADDB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] + b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDB" form="xmm {z}, xmm, xmm" xed="VPADDB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDSB" form="ymm {k}, ymm, ymm" xed="VPADDSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDSB" form="ymm {z}, ymm, ymm" xed="VPADDSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSB" form="zmm, zmm, zmm" xed="VPADDSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSB" form="zmm {k}, zmm, zmm" xed="VPADDSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSB" form="zmm {z}, zmm, zmm" xed="VPADDSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDSB" form="xmm {k}, xmm, xmm" xed="VPADDSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_adds_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDSB" form="xmm {z}, xmm, xmm" xed="VPADDSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDSW" form="ymm {k}, ymm, ymm" xed="VPADDSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDSW" form="ymm {z}, ymm, ymm" xed="VPADDSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSW" form="zmm, zmm, zmm" xed="VPADDSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSW" form="zmm {k}, zmm, zmm" xed="VPADDSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSW" form="zmm {z}, zmm, zmm" xed="VPADDSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDSW" form="xmm {k}, xmm, xmm" xed="VPADDSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_adds_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDSW" form="xmm {z}, xmm, xmm" xed="VPADDSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="ymm {k}, ymm, ymm" xed="VPADDUSB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="ymm {z}, ymm, ymm" xed="VPADDUSB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="zmm, zmm, zmm" xed="VPADDUSB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="zmm {k}, zmm, zmm" xed="VPADDUSB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="zmm {z}, zmm, zmm" xed="VPADDUSB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="xmm {k}, xmm, xmm" xed="VPADDUSB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_adds_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDUSB" form="xmm {z}, xmm, xmm" xed="VPADDUSB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="ymm {k}, ymm, ymm" xed="VPADDUSW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="ymm {z}, ymm, ymm" xed="VPADDUSW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="zmm, zmm, zmm" xed="VPADDUSW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="zmm {k}, zmm, zmm" xed="VPADDUSW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="zmm {z}, zmm, zmm" xed="VPADDUSW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="xmm {k}, xmm, xmm" xed="VPADDUSW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_adds_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDUSW" form="xmm {z}, xmm, xmm" xed="VPADDUSW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDW" form="ymm {k}, ymm, ymm" xed="VPADDW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDW" form="ymm {z}, ymm, ymm" xed="VPADDW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDW" form="zmm, zmm, zmm" xed="VPADDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDW" form="zmm {k}, zmm, zmm" xed="VPADDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDW" form="zmm {z}, zmm, zmm" xed="VPADDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDW" form="xmm {k}, xmm, xmm" xed="VPADDW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] + b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDW" form="xmm {z}, xmm, xmm" xed="VPADDW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128)[255:0] OR b[i+127:i]) &gt;&gt; (imm8*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="ymm {k}, ymm, ymm, imm8" xed="VPALIGNR_YMMu8_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128)[255:0] OR b[i+127:i]) &gt;&gt; (imm8*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="ymm {z}, ymm, ymm, imm8" xed="VPALIGNR_YMMu8_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128)[255:0] OR b[i+127:i]) &gt;&gt; (imm8*8)
+	dst[i+127:i] := tmp[127:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="zmm, zmm, zmm, imm8" xed="VPALIGNR_ZMMu8_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128)[255:0] OR b[i+127:i]) &gt;&gt; (imm8*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="zmm {k}, zmm, zmm, imm8" xed="VPALIGNR_ZMMu8_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*128
+	tmp[255:0] := ((a[i+127:i] &lt;&lt; 128)[255:0] OR b[i+127:i]) &gt;&gt; (imm8*8)
+	tmp_dst[i+127:i] := tmp[127:0]
+ENDFOR
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="zmm {z}, zmm, zmm, imm8" xed="VPALIGNR_ZMMu8_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[255:0] := ((a[127:0] &lt;&lt; 128)[255:0] OR b[127:0]) &gt;&gt; (imm8*8)
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="xmm {k}, xmm, xmm, imm8" xed="VPALIGNR_XMMu8_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate pairs of 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[255:0] := ((a[127:0] &lt;&lt; 128)[255:0] OR b[127:0]) &gt;&gt; (imm8*8)
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPALIGNR" form="xmm {z}, xmm, xmm, imm8" xed="VPALIGNR_XMMu8_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPAVGB" form="ymm {k}, ymm, ymm" xed="VPAVGB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPAVGB" form="ymm {z}, ymm, ymm" xed="VPAVGB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPAVGB" form="zmm, zmm, zmm" xed="VPAVGB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPAVGB" form="zmm {k}, zmm, zmm" xed="VPAVGB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPAVGB" form="zmm {z}, zmm, zmm" xed="VPAVGB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPAVGB" form="xmm {k}, xmm, xmm" xed="VPAVGB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_avg_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPAVGB" form="xmm {z}, xmm, xmm" xed="VPAVGB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPAVGW" form="ymm {k}, ymm, ymm" xed="VPAVGW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPAVGW" form="ymm {z}, ymm, ymm" xed="VPAVGW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPAVGW" form="zmm, zmm, zmm" xed="VPAVGW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPAVGW" form="zmm {k}, zmm, zmm" xed="VPAVGW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPAVGW" form="zmm {z}, zmm, zmm" xed="VPAVGW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPAVGW" form="xmm {k}, xmm, xmm" xed="VPAVGW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_avg_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPAVGW" form="xmm {z}, xmm, xmm" xed="VPAVGW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_blend_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBLENDMB" form="ymm {k}, ymm, ymm" xed="VPBLENDMB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_blend_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBLENDMB" form="zmm {k}, zmm, zmm" xed="VPBLENDMB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_blend_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBLENDMB" form="xmm {k}, xmm, xmm" xed="VPBLENDMB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_blend_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBLENDMW" form="ymm {k}, ymm, ymm" xed="VPBLENDMW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_blend_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBLENDMW" form="zmm {k}, zmm, zmm" xed="VPBLENDMW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_blend_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBLENDMW" form="xmm {k}, xmm, xmm" xed="VPBLENDMW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="ymm {k}, xmm" xed="VPBROADCASTB_YMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="ymm {k}, r8" xed="VPBROADCASTB_YMMu8_MASKmskw_GPR32u8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="ymm {z}, xmm" xed="VPBROADCASTB_YMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="ymm {z}, r8" xed="VPBROADCASTB_YMMu8_MASKmskw_GPR32u8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="zmm, xmm" xed="VPBROADCASTB_ZMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="zmm {k}, xmm" xed="VPBROADCASTB_ZMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="zmm {k}, r8" xed="VPBROADCASTB_ZMMu8_MASKmskw_GPR32u8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="zmm {z}, xmm" xed="VPBROADCASTB_ZMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="zmm {z}, r8" xed="VPBROADCASTB_ZMMu8_MASKmskw_GPR32u8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="xmm {k}, xmm" xed="VPBROADCASTB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="xmm {k}, r8" xed="VPBROADCASTB_XMMu8_MASKmskw_GPR32u8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_broadcastb_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Broadcast the low packed 8-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="xmm {z}, xmm" xed="VPBROADCASTB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[7:0]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="xmm {z}, r8" xed="VPBROADCASTB_XMMu8_MASKmskw_GPR32u8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="ymm {k}, xmm" xed="VPBROADCASTW_YMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="ymm {k}, r16" xed="VPBROADCASTW_YMMu16_MASKmskw_GPR32u16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="ymm {z}, xmm" xed="VPBROADCASTW_YMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast 16-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="ymm {z}, r16" xed="VPBROADCASTW_YMMu16_MASKmskw_GPR32u16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="zmm, xmm" xed="VPBROADCASTW_ZMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="zmm {k}, xmm" xed="VPBROADCASTW_ZMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast 16-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="zmm {k}, r16" xed="VPBROADCASTW_ZMMu16_MASKmskw_GPR32u16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="zmm {z}, xmm" xed="VPBROADCASTW_ZMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="zmm {z}, r16" xed="VPBROADCASTW_ZMMu16_MASKmskw_GPR32u16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="xmm {k}, xmm" xed="VPBROADCASTW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="xmm {k}, r16" xed="VPBROADCASTW_XMMu16_MASKmskw_GPR32u16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_broadcastw_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="xmm {z}, xmm" xed="VPBROADCASTW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="xmm {z}, r16" xed="VPBROADCASTW_XMMu16_MASKmskw_GPR32u16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, ymm, ymm, imm8" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, ymm, ymm, imm8" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, ymm, ymm" xed="VPCMPB_MASKmskw_MASKmskw_YMMi8_YMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, zmm, zmm, imm8" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, zmm, zmm, imm8" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, zmm, zmm" xed="VPCMPB_MASKmskw_MASKmskw_ZMMi8_ZMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, xmm, xmm, imm8" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, xmm, xmm, imm8" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPB" form="k {k}, xmm, xmm" xed="VPCMPB_MASKmskw_MASKmskw_XMMi8_XMMi8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, ymm, ymm, imm8" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, ymm, ymm, imm8" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, ymm, ymm" xed="VPCMPUB_MASKmskw_MASKmskw_YMMu8_YMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, zmm, zmm, imm8" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, zmm, zmm, imm8" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, zmm, zmm" xed="VPCMPUB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, xmm, xmm, imm8" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] OP b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, xmm, xmm, imm8" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] == b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &gt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt;= b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] &lt; b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epu8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ( a[i+7:i] != b[i+7:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUB" form="k {k}, xmm, xmm" xed="VPCMPUB_MASKmskw_MASKmskw_XMMu8_XMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, ymm, ymm, imm8" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, ymm, ymm, imm8" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, ymm, ymm" xed="VPCMPUW_MASKmskw_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, zmm, zmm, imm8" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, zmm, zmm, imm8" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, zmm, zmm" xed="VPCMPUW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, xmm, xmm, imm8" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, xmm, xmm, imm8" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epu16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUW" form="k {k}, xmm, xmm" xed="VPCMPUW_MASKmskw_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, ymm, ymm, imm8" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, ymm, ymm, imm8" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, ymm, ymm" xed="VPCMPW_MASKmskw_MASKmskw_YMMi16_YMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, zmm, zmm, imm8" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, zmm, zmm, imm8" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, zmm, zmm" xed="VPCMPW_MASKmskw_MASKmskw_ZMMi16_ZMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, xmm, xmm, imm8" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] OP b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, xmm, xmm, imm8" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] == b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &gt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt;= b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] &lt; b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ( a[i+15:i] != b[i+15:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPW" form="k {k}, xmm, xmm" xed="VPCMPW_MASKmskw_MASKmskw_XMMi16_XMMi16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask2_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="idx" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+3:i]
+		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := idx[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="ymm {k}, ymm, ymm" xed="VPERMI2W_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+3:i]
+		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMT2W" form="ymm {k}, ymm, ymm" xed="VPERMT2W_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="idx" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+3:i]
+		dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="ymm {z}, ymm, ymm" xed="VPERMI2W_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<instruction name="VPERMT2W" form="ymm {z}, ymm, ymm" xed="VPERMT2W_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="idx" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	off := 16*idx[i+3:i]
+	dst[i+15:i] := idx[i+4] ? b[off+15:off] : a[off+15:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="ymm, ymm, ymm" xed="VPERMI2W_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<instruction name="VPERMT2W" form="ymm, ymm, ymm" xed="VPERMT2W_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask2_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="idx" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+4:i]
+		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := idx[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="zmm {k}, zmm, zmm" xed="VPERMI2W_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+4:i]
+		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMT2W" form="zmm {k}, zmm, zmm" xed="VPERMT2W_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="idx" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+4:i]
+		dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="zmm {z}, zmm, zmm" xed="VPERMI2W_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<instruction name="VPERMT2W" form="zmm {z}, zmm, zmm" xed="VPERMT2W_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="idx" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	off := 16*idx[i+4:i]
+	dst[i+15:i] := idx[i+5] ? b[off+15:off] : a[off+15:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="zmm, zmm, zmm" xed="VPERMI2W_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<instruction name="VPERMT2W" form="zmm, zmm, zmm" xed="VPERMT2W_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask2_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="idx" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+2:i]
+		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := idx[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="xmm {k}, xmm, xmm" xed="VPERMI2W_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+2:i]
+		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMT2W" form="xmm {k}, xmm, xmm" xed="VPERMT2W_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="idx" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		off := 16*idx[i+2:i]
+		dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="xmm {z}, xmm, xmm" xed="VPERMI2W_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<instruction name="VPERMT2W" form="xmm {z}, xmm, xmm" xed="VPERMT2W_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutex2var_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="idx" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	off := 16*idx[i+2:i]
+	dst[i+15:i] := idx[i+3] ? b[off+15:off] : a[off+15:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2W" form="xmm, xmm, xmm" xed="VPERMI2W_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<instruction name="VPERMT2W" form="xmm, xmm, xmm" xed="VPERMT2W_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	id := idx[i+3:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMW" form="ymm {k}, ymm, ymm" xed="VPERMW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	id := idx[i+3:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMW" form="ymm {z}, ymm, ymm" xed="VPERMW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="idx" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	id := idx[i+3:i]*16
+	dst[i+15:i] := a[id+15:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMW" form="ymm, ymm, ymm" xed="VPERMW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	id := idx[i+4:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMW" form="zmm {k}, zmm, zmm" xed="VPERMW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	id := idx[i+4:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMW" form="zmm {z}, zmm, zmm" xed="VPERMW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="idx" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	id := idx[i+4:i]*16
+	dst[i+15:i] := a[id+15:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMW" form="zmm, zmm, zmm" xed="VPERMW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	id := idx[i+2:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMW" form="xmm {k}, xmm, xmm" xed="VPERMW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	id := idx[i+2:i]*16
+	IF k[j]
+		dst[i+15:i] := a[id+15:id]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMW" form="xmm {z}, xmm, xmm" xed="VPERMW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutexvar_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="idx" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Shuffle 16-bit integers in "a" using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	id := idx[i+2:i]*16
+	dst[i+15:i] := a[id+15:id]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMW" form="xmm, xmm, xmm" xed="VPERMW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="src" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="ymm {k}, ymm, ymm" xed="VPMADDUBSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="ymm {z}, ymm, ymm" xed="VPMADDUBSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="zmm, zmm, zmm" xed="VPMADDUBSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__m512i" varname="src" etype="SI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="zmm {k}, zmm, zmm" xed="VPMADDUBSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="zmm {z}, zmm, zmm" xed="VPMADDUBSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="xmm {k}, xmm, xmm" xed="VPMADDUBSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply packed unsigned 8-bit integers in "a" by packed signed 8-bit integers in "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADDUBSW" form="xmm {z}, xmm, xmm" xed="VPMADDUBSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="ymm {k}, ymm, ymm" xed="VPMADDWD_YMMi32_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="ymm {z}, ymm, ymm" xed="VPMADDWD_YMMi32_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="zmm, zmm, zmm" xed="VPMADDWD_ZMMi32_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="zmm {k}, zmm, zmm" xed="VPMADDWD_ZMMi32_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="zmm {z}, zmm, zmm" xed="VPMADDWD_ZMMi32_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="xmm {k}, xmm, xmm" xed="VPMADDWD_XMMi32_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_madd_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADDWD" form="xmm {z}, xmm, xmm" xed="VPMADDWD_XMMi32_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="ymm {k}, ymm, ymm" xed="VPMAXSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="ymm {z}, ymm, ymm" xed="VPMAXSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="zmm {k}, zmm, zmm" xed="VPMAXSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="zmm {z}, zmm, zmm" xed="VPMAXSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="zmm, zmm, zmm" xed="VPMAXSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="xmm {k}, xmm, xmm" xed="VPMAXSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSB" form="xmm {z}, xmm, xmm" xed="VPMAXSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="ymm {k}, ymm, ymm" xed="VPMAXSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="ymm {z}, ymm, ymm" xed="VPMAXSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="zmm {k}, zmm, zmm" xed="VPMAXSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="zmm {z}, zmm, zmm" xed="VPMAXSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="zmm, zmm, zmm" xed="VPMAXSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="xmm {k}, xmm, xmm" xed="VPMAXSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSW" form="xmm {z}, xmm, xmm" xed="VPMAXSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="ymm {k}, ymm, ymm" xed="VPMAXUB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="ymm {z}, ymm, ymm" xed="VPMAXUB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="zmm {k}, zmm, zmm" xed="VPMAXUB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="zmm {z}, zmm, zmm" xed="VPMAXUB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="zmm, zmm, zmm" xed="VPMAXUB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="xmm {k}, xmm, xmm" xed="VPMAXUB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUB" form="xmm {z}, xmm, xmm" xed="VPMAXUB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="ymm {k}, ymm, ymm" xed="VPMAXUW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="ymm {z}, ymm, ymm" xed="VPMAXUW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="zmm {k}, zmm, zmm" xed="VPMAXUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="zmm {z}, zmm, zmm" xed="VPMAXUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="zmm, zmm, zmm" xed="VPMAXUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="xmm {k}, xmm, xmm" xed="VPMAXUW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUW" form="xmm {z}, xmm, xmm" xed="VPMAXUW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSB" form="ymm {k}, ymm, ymm" xed="VPMINSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSB" form="ymm {z}, ymm, ymm" xed="VPMINSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSB" form="zmm {k}, zmm, zmm" xed="VPMINSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSB" form="zmm {z}, zmm, zmm" xed="VPMINSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSB" form="zmm, zmm, zmm" xed="VPMINSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSB" form="xmm {k}, xmm, xmm" xed="VPMINSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSB" form="xmm {z}, xmm, xmm" xed="VPMINSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSW" form="ymm {k}, ymm, ymm" xed="VPMINSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSW" form="ymm {z}, ymm, ymm" xed="VPMINSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSW" form="zmm {k}, zmm, zmm" xed="VPMINSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSW" form="zmm {z}, zmm, zmm" xed="VPMINSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSW" form="zmm, zmm, zmm" xed="VPMINSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSW" form="xmm {k}, xmm, xmm" xed="VPMINSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSW" form="xmm {z}, xmm, xmm" xed="VPMINSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUB" form="ymm {k}, ymm, ymm" xed="VPMINUB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUB" form="ymm {z}, ymm, ymm" xed="VPMINUB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUB" form="zmm {k}, zmm, zmm" xed="VPMINUB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUB" form="zmm {z}, zmm, zmm" xed="VPMINUB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUB" form="zmm, zmm, zmm" xed="VPMINUB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUB" form="xmm {k}, xmm, xmm" xed="VPMINUB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUB" form="xmm {z}, xmm, xmm" xed="VPMINUB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUW" form="ymm {k}, ymm, ymm" xed="VPMINUW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUW" form="ymm {z}, ymm, ymm" xed="VPMINUW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUW" form="zmm {k}, zmm, zmm" xed="VPMINUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUW" form="zmm {z}, zmm, zmm" xed="VPMINUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUW" form="zmm, zmm, zmm" xed="VPMINUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUW" form="xmm {k}, xmm, xmm" xed="VPMINUW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUW" form="xmm {z}, xmm, xmm" xed="VPMINUW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movepi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF a[i+7]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVB2M" form="k, ymm" xed="VPMOVB2M_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movepi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF a[i+7]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVB2M" form="k, zmm" xed="VPMOVB2M_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movepi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 8-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF a[i+7]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVB2M" form="k, xmm" xed="VPMOVB2M_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movm_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := 0xFF
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVM2B" form="ymm" xed="VPMOVM2B_YMMu8_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movm_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := 0xFF
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVM2B" form="zmm" xed="VPMOVM2B_ZMMu8_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movm_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Set each packed 8-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := 0xFF
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVM2B" form="xmm" xed="VPMOVM2B_XMMu8_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movm_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := 0xFFFF
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVM2W" form="ymm" xed="VPMOVM2W_YMMu16_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movm_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := 0xFFFF
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVM2W" form="zmm" xed="VPMOVM2W_ZMMu16_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movm_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Set each packed 16-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := 0xFFFF
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVM2W" form="xmm" xed="VPMOVM2W_XMMu16_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate8(a[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="xmm, ymm" xed="VPMOVSWB_XMMi8_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="SI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="xmm {k}, ymm" xed="VPMOVSWB_XMMi8_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSWB" form="m128 {k}, ymm" xed="VPMOVSWB_MEMi8_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="xmm {z}, ymm" xed="VPMOVSWB_XMMi8_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI8"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate8(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="ymm, zmm" xed="VPMOVSWB_YMMi8_MASKmskw_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI8"/>
+	<parameter type="__m256i" varname="src" etype="SI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="ymm {k}, zmm" xed="VPMOVSWB_YMMi8_MASKmskw_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="256"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSWB" form="m256 {k}, zmm" xed="VPMOVSWB_MEMi8_MASKmskw_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="ymm {z}, zmm" xed="VPMOVSWB_YMMi8_MASKmskw_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Saturate8(a[i+15:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="xmm, xmm" xed="VPMOVSWB_XMMi8_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="xmm {k}, xmm" xed="VPMOVSWB_XMMi8_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSWB" form="m64 {k}, xmm" xed="VPMOVSWB_MEMi8_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtsepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSWB" form="xmm {z}, xmm" xed="VPMOVSWB_XMMi8_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="src" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="ymm {k}, xmm" xed="VPMOVSXBW_YMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="ymm {z}, xmm" xed="VPMOVSXBW_YMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	dst[l+15:l] := SignExtend16(a[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="zmm, ymm" xed="VPMOVSXBW_ZMMi16_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__m512i" varname="src" etype="SI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="zmm {k}, ymm" xed="VPMOVSXBW_ZMMi16_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="zmm {z}, ymm" xed="VPMOVSXBW_ZMMi16_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="xmm {k}, xmm" xed="VPMOVSXBW_XMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := SignExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXBW" form="xmm {z}, xmm" xed="VPMOVSXBW_XMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := SaturateU8(a[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="xmm, ymm" xed="VPMOVUSWB_XMMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="xmm {k}, ymm" xed="VPMOVUSWB_XMMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSWB" form="m128 {k}, ymm" xed="VPMOVUSWB_MEMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="xmm {z}, ymm" xed="VPMOVUSWB_XMMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := SaturateU8(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="ymm, zmm" xed="VPMOVUSWB_YMMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="ymm {k}, zmm" xed="VPMOVUSWB_YMMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="256"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSWB" form="m256 {k}, zmm" xed="VPMOVUSWB_MEMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="ymm {z}, zmm" xed="VPMOVUSWB_YMMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := SaturateU8(a[i+15:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="xmm, xmm" xed="VPMOVUSWB_XMMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="xmm {k}, xmm" xed="VPMOVUSWB_XMMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSWB" form="m64 {k}, xmm" xed="VPMOVUSWB_MEMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtusepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSWB" form="xmm {z}, xmm" xed="VPMOVUSWB_XMMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movepi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF a[i+15]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVW2M" form="k, ymm" xed="VPMOVW2M_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movepi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF a[i+15]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVW2M" form="k, zmm" xed="VPMOVW2M_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movepi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 16-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPMOVW2M" form="k, xmm" xed="VPMOVW2M_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Truncate8(a[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="xmm, ymm" xed="VPMOVWB_XMMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="xmm {k}, ymm" xed="VPMOVWB_XMMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVWB" form="m128 {k}, ymm" xed="VPMOVWB_MEMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="xmm {z}, ymm" xed="VPMOVWB_XMMu8_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Truncate8(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="ymm, zmm" xed="VPMOVWB_YMMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="ymm {k}, zmm" xed="VPMOVWB_YMMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="256"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVWB" form="m256 {k}, zmm" xed="VPMOVWB_MEMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="ymm {z}, zmm" xed="VPMOVWB_YMMu8_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	dst[l+7:l] := Truncate8(a[i+15:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="xmm, xmm" xed="VPMOVWB_XMMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="xmm {k}, xmm" xed="VPMOVWB_XMMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi16_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVWB" form="m64 {k}, xmm" xed="VPMOVWB_MEMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi16_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+15:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVWB" form="xmm {z}, xmm" xed="VPMOVWB_XMMu8_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="ymm {k}, xmm" xed="VPMOVZXBW_YMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="ymm {z}, xmm" xed="VPMOVZXBW_YMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	dst[l+15:l] := ZeroExtend16(a[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="zmm, ymm" xed="VPMOVZXBW_ZMMi16_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="zmm {k}, ymm" xed="VPMOVZXBW_ZMMi16_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="zmm {z}, ymm" xed="VPMOVZXBW_ZMMi16_MASKmskw_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="xmm {k}, xmm" xed="VPMOVZXBW_XMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	IF k[j]
+		dst[l+15:l] := ZeroExtend16(a[i+7:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXBW" form="xmm {z}, xmm" xed="VPMOVZXBW_XMMi16_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="ymm {k}, ymm, ymm" xed="VPMULHRSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="ymm {z}, ymm, ymm" xed="VPMULHRSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="zmm {k}, zmm, zmm" xed="VPMULHRSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="zmm {z}, zmm, zmm" xed="VPMULHRSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="zmm, zmm, zmm" xed="VPMULHRSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="xmm {k}, xmm, xmm" xed="VPMULHRSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+		dst[i+15:i] := tmp[16:1]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULHRSW" form="xmm {z}, xmm, xmm" xed="VPMULHRSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="ymm {k}, ymm, ymm" xed="VPMULHUW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="ymm {z}, ymm, ymm" xed="VPMULHUW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="zmm {k}, zmm, zmm" xed="VPMULHUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="zmm {z}, zmm, zmm" xed="VPMULHUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="zmm, zmm, zmm" xed="VPMULHUW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="xmm {k}, xmm, xmm" xed="VPMULHUW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := a[i+15:i] * b[i+15:i]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULHUW" form="xmm {z}, xmm, xmm" xed="VPMULHUW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHW" form="ymm {k}, ymm, ymm" xed="VPMULHW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULHW" form="ymm {z}, ymm, ymm" xed="VPMULHW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHW" form="zmm {k}, zmm, zmm" xed="VPMULHW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHW" form="zmm {z}, zmm, zmm" xed="VPMULHW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHW" form="zmm, zmm, zmm" xed="VPMULHW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULHW" form="xmm {k}, xmm, xmm" xed="VPMULHW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULHW" form="xmm {z}, xmm, xmm" xed="VPMULHW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLW" form="ymm {k}, ymm, ymm" xed="VPMULLW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLW" form="ymm {z}, ymm, ymm" xed="VPMULLW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLW" form="zmm {k}, zmm, zmm" xed="VPMULLW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLW" form="zmm {z}, zmm, zmm" xed="VPMULLW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLW" form="zmm, zmm, zmm" xed="VPMULLW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULLW" form="xmm {k}, xmm, xmm" xed="VPMULLW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+		dst[i+15:i] := tmp[15:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULLW" form="xmm {z}, xmm, xmm" xed="VPMULLW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sad_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+FOR j := 0 to 7
+	i := j*64
+	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \
+	               tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
+	dst[i+63:i+16] := 0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSADBW" form="zmm, zmm, zmm" xed="VPSADBW_ZMMu16_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[4:0] := b[i+3:i] + (j &amp; 0x10)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="ymm {k}, ymm, ymm" xed="VPSHUFB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[4:0] := b[i+3:i] + (j &amp; 0x10)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="ymm {z}, ymm, ymm" xed="VPSHUFB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" within 128-bit lanes using the control in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[5:0] := b[i+3:i] + (j &amp; 0x30)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="zmm {k}, zmm, zmm" xed="VPSHUFB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[5:0] := b[i+3:i] + (j &amp; 0x30)
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="zmm {z}, zmm, zmm" xed="VPSHUFB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[5:0] := b[i+3:i] + (j &amp; 0x30)
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="zmm, zmm, zmm" xed="VPSHUFB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[3:0] := b[i+3:i]
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="xmm {k}, xmm, xmm" xed="VPSHUFB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		IF b[i+7] == 1
+			dst[i+7:i] := 0
+		ELSE
+			index[3:0] := b[i+3:i]
+			dst[i+7:i] := a[index*8+7:index*8]
+		FI
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFB" form="xmm {z}, xmm, xmm" xed="VPSHUFB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="ymm {k}, ymm, imm8" xed="VPSHUFHW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="ymm {z}, ymm, imm8" xed="VPSHUFHW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+tmp_dst[319:256] := a[319:256]
+tmp_dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
+tmp_dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
+tmp_dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
+tmp_dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
+tmp_dst[447:384] := a[447:384]
+tmp_dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
+tmp_dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
+tmp_dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
+tmp_dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="zmm {k}, zmm, imm8" xed="VPSHUFHW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+tmp_dst[191:128] := a[191:128]
+tmp_dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+tmp_dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+tmp_dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+tmp_dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+tmp_dst[319:256] := a[319:256]
+tmp_dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
+tmp_dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
+tmp_dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
+tmp_dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
+tmp_dst[447:384] := a[447:384]
+tmp_dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
+tmp_dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
+tmp_dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
+tmp_dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="zmm {z}, zmm, imm8" xed="VPSHUFHW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the high 64 bits of 128-bit lanes of "dst", with the low 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+dst[191:128] := a[191:128]
+dst[207:192] := (a &gt;&gt; (imm8[1:0] * 16))[207:192]
+dst[223:208] := (a &gt;&gt; (imm8[3:2] * 16))[207:192]
+dst[239:224] := (a &gt;&gt; (imm8[5:4] * 16))[207:192]
+dst[255:240] := (a &gt;&gt; (imm8[7:6] * 16))[207:192]
+dst[319:256] := a[319:256]
+dst[335:320] := (a &gt;&gt; (imm8[1:0] * 16))[335:320]
+dst[351:336] := (a &gt;&gt; (imm8[3:2] * 16))[335:320]
+dst[367:352] := (a &gt;&gt; (imm8[5:4] * 16))[335:320]
+dst[383:368] := (a &gt;&gt; (imm8[7:6] * 16))[335:320]
+dst[447:384] := a[447:384]
+dst[463:448] := (a &gt;&gt; (imm8[1:0] * 16))[463:448]
+dst[479:464] := (a &gt;&gt; (imm8[3:2] * 16))[463:448]
+dst[495:480] := (a &gt;&gt; (imm8[5:4] * 16))[463:448]
+dst[511:496] := (a &gt;&gt; (imm8[7:6] * 16))[463:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="zmm, zmm, imm8" xed="VPSHUFHW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="xmm {k}, xmm, imm8" xed="VPSHUFHW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := a[63:0]
+tmp_dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+tmp_dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+tmp_dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+tmp_dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFHW" form="xmm {z}, xmm, imm8" xed="VPSHUFHW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="ymm {k}, ymm, imm8" xed="VPSHUFLW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="ymm {z}, ymm, imm8" xed="VPSHUFLW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+tmp_dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
+tmp_dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
+tmp_dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
+tmp_dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
+tmp_dst[383:320] := a[383:320]
+tmp_dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
+tmp_dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
+tmp_dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
+tmp_dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
+tmp_dst[511:448] := a[511:448]
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="zmm {k}, zmm, imm8" xed="VPSHUFLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+tmp_dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+tmp_dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+tmp_dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+tmp_dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+tmp_dst[255:192] := a[255:192]
+tmp_dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
+tmp_dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
+tmp_dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
+tmp_dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
+tmp_dst[383:320] := a[383:320]
+tmp_dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
+tmp_dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
+tmp_dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
+tmp_dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
+tmp_dst[511:448] := a[511:448]
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="zmm {z}, zmm, imm8" xed="VPSHUFLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst".</description>
+	<operation>
+dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+dst[127:64] := a[127:64]
+dst[143:128] := (a &gt;&gt; (imm8[1:0] * 16))[143:128]
+dst[159:144] := (a &gt;&gt; (imm8[3:2] * 16))[143:128]
+dst[175:160] := (a &gt;&gt; (imm8[5:4] * 16))[143:128]
+dst[191:176] := (a &gt;&gt; (imm8[7:6] * 16))[143:128]
+dst[255:192] := a[255:192]
+dst[271:256] := (a &gt;&gt; (imm8[1:0] * 16))[271:256]
+dst[287:272] := (a &gt;&gt; (imm8[3:2] * 16))[271:256]
+dst[303:288] := (a &gt;&gt; (imm8[5:4] * 16))[271:256]
+dst[319:304] := (a &gt;&gt; (imm8[7:6] * 16))[271:256]
+dst[383:320] := a[383:320]
+dst[399:384] := (a &gt;&gt; (imm8[1:0] * 16))[399:384]
+dst[415:400] := (a &gt;&gt; (imm8[3:2] * 16))[399:384]
+dst[431:416] := (a &gt;&gt; (imm8[5:4] * 16))[399:384]
+dst[447:432] := (a &gt;&gt; (imm8[7:6] * 16))[399:384]
+dst[511:448] := a[511:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="zmm, zmm, imm8" xed="VPSHUFLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="xmm {k}, xmm, imm8" xed="VPSHUFLW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst", using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+tmp_dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+tmp_dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+tmp_dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+tmp_dst[127:64] := a[127:64]
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFLW" form="xmm {z}, xmm, imm8" xed="VPSHUFLW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_bslli_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="a" etype="M128"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 128-bit lanes in "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+dst[255:128] := a[255:128] &lt;&lt; (tmp*8)
+dst[383:256] := a[383:256] &lt;&lt; (tmp*8)
+dst[511:384] := a[511:384] &lt;&lt; (tmp*8)
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLDQ" form="zmm, zmm, imm8" xed="VPSLLDQ_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="ymm {k}, ymm, ymm" xed="VPSLLVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="ymm {z}, ymm, ymm" xed="VPSLLVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="ymm, ymm, ymm" xed="VPSLLVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="zmm {k}, zmm, zmm" xed="VPSLLVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="zmm {z}, zmm, zmm" xed="VPSLLVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="zmm, zmm, zmm" xed="VPSLLVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="xmm {k}, xmm, xmm" xed="VPSLLVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="xmm {z}, xmm, xmm" xed="VPSLLVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_sllv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVW" form="xmm, xmm, xmm" xed="VPSLLVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLW" form="ymm {k}, ymm, xmm" xed="VPSLLW_YMMu16_MASKmskw_YMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLW" form="ymm {k}, ymm, imm8" xed="VPSLLW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLW" form="ymm {z}, ymm, xmm" xed="VPSLLW_YMMu16_MASKmskw_YMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLW" form="ymm {z}, ymm, imm8" xed="VPSLLW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLW" form="zmm {k}, zmm, xmm" xed="VPSLLW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLW" form="zmm {k}, zmm, imm8" xed="VPSLLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLW" form="zmm {z}, zmm, xmm" xed="VPSLLW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLW" form="zmm {z}, zmm, imm8" xed="VPSLLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLW" form="zmm, zmm, xmm" xed="VPSLLW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLW" form="zmm, zmm, imm8" xed="VPSLLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLW" form="xmm {k}, xmm, xmm" xed="VPSLLW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLW" form="xmm {k}, xmm, imm8" xed="VPSLLW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sll_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLW" form="xmm {z}, xmm, xmm" xed="VPSLLW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_slli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLW" form="xmm {z}, xmm, imm8" xed="VPSLLW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="ymm {k}, ymm, ymm" xed="VPSRAVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="ymm {z}, ymm, ymm" xed="VPSRAVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="ymm, ymm, ymm" xed="VPSRAVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="zmm {k}, zmm, zmm" xed="VPSRAVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="zmm {z}, zmm, zmm" xed="VPSRAVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="zmm, zmm, zmm" xed="VPSRAVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="xmm {k}, xmm, xmm" xed="VPSRAVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="xmm {z}, xmm, xmm" xed="VPSRAVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_srav_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0)
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVW" form="xmm, xmm, xmm" xed="VPSRAVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAW" form="ymm {k}, ymm, xmm" xed="VPSRAW_YMMu16_MASKmskw_YMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAW" form="ymm {k}, ymm, imm8" xed="VPSRAW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAW" form="ymm {z}, ymm, xmm" xed="VPSRAW_YMMu16_MASKmskw_YMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAW" form="ymm {z}, ymm, imm8" xed="VPSRAW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAW" form="zmm {k}, zmm, xmm" xed="VPSRAW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAW" form="zmm {k}, zmm, imm8" xed="VPSRAW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAW" form="zmm {z}, zmm, xmm" xed="VPSRAW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAW" form="zmm {z}, zmm, imm8" xed="VPSRAW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAW" form="zmm, zmm, xmm" xed="VPSRAW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAW" form="zmm, zmm, imm8" xed="VPSRAW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAW" form="xmm {k}, xmm, xmm" xed="VPSRAW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAW" form="xmm {k}, xmm, imm8" xed="VPSRAW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sra_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAW" form="xmm {z}, xmm, xmm" xed="VPSRAW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srai_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+		ELSE
+			dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAW" form="xmm {z}, xmm, imm8" xed="VPSRAW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_bsrli_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="a" etype="M128"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+dst[255:128] := a[255:128] &gt;&gt; (tmp*8)
+dst[383:256] := a[383:256] &gt;&gt; (tmp*8)
+dst[511:384] := a[511:384] &gt;&gt; (tmp*8)
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLDQ" form="zmm, zmm, imm8" xed="VPSRLDQ_ZMMu8_ZMMu8_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="ymm {k}, ymm, ymm" xed="VPSRLVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="ymm {z}, ymm, ymm" xed="VPSRLVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="ymm, ymm, ymm" xed="VPSRLVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="zmm {k}, zmm, zmm" xed="VPSRLVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="zmm {z}, zmm, zmm" xed="VPSRLVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="zmm, zmm, zmm" xed="VPSRLVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="xmm {k}, xmm, xmm" xed="VPSRLVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[i+15:i] &lt; 16
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+		ELSE
+			dst[i+15:i] := 0
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="xmm {z}, xmm, xmm" xed="VPSRLVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_srlv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[i+15:i] &lt; 16
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVW" form="xmm, xmm, xmm" xed="VPSRLVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLW" form="ymm {k}, ymm, xmm" xed="VPSRLW_YMMu16_MASKmskw_YMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLW" form="ymm {k}, ymm, imm8" xed="VPSRLW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLW" form="ymm {z}, ymm, xmm" xed="VPSRLW_YMMu16_MASKmskw_YMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLW" form="ymm {z}, ymm, imm8" xed="VPSRLW_YMMu16_MASKmskw_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLW" form="zmm {k}, zmm, xmm" xed="VPSRLW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLW" form="zmm {k}, zmm, imm8" xed="VPSRLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLW" form="zmm {z}, zmm, xmm" xed="VPSRLW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLW" form="zmm {z}, zmm, imm8" xed="VPSRLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLW" form="zmm, zmm, xmm" xed="VPSRLW_ZMMu16_MASKmskw_ZMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLW" form="zmm, zmm, imm8" xed="VPSRLW_ZMMu16_MASKmskw_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLW" form="xmm {k}, xmm, xmm" xed="VPSRLW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLW" form="xmm {k}, xmm, imm8" xed="VPSRLW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srl_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF count[63:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLW" form="xmm {z}, xmm, xmm" xed="VPSRLW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srli_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		IF imm8[7:0] &gt; 15
+			dst[i+15:i] := 0
+		ELSE
+			dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLW" form="xmm {z}, xmm, imm8" xed="VPSRLW_XMMu16_MASKmskw_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBB" form="ymm {k}, ymm, ymm" xed="VPSUBB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBB" form="ymm {z}, ymm, ymm" xed="VPSUBB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBB" form="zmm {k}, zmm, zmm" xed="VPSUBB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBB" form="zmm {z}, zmm, zmm" xed="VPSUBB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBB" form="zmm, zmm, zmm" xed="VPSUBB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBB" form="xmm {k}, xmm, xmm" xed="VPSUBB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[i+7:i] - b[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBB" form="xmm {z}, xmm, xmm" xed="VPSUBB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="ymm {k}, ymm, ymm" xed="VPSUBSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="ymm {z}, ymm, ymm" xed="VPSUBSB_YMMi8_MASKmskw_YMMi8_YMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="zmm {k}, zmm, zmm" xed="VPSUBSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="zmm {z}, zmm, zmm" xed="VPSUBSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="zmm, zmm, zmm" xed="VPSUBSB_ZMMi8_MASKmskw_ZMMi8_ZMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="xmm {k}, xmm, xmm" xed="VPSUBSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_subs_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBSB" form="xmm {z}, xmm, xmm" xed="VPSUBSB_XMMi8_MASKmskw_XMMi8_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="ymm {k}, ymm, ymm" xed="VPSUBSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="ymm {z}, ymm, ymm" xed="VPSUBSW_YMMi16_MASKmskw_YMMi16_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="zmm {k}, zmm, zmm" xed="VPSUBSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="zmm {z}, zmm, zmm" xed="VPSUBSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="zmm, zmm, zmm" xed="VPSUBSW_ZMMi16_MASKmskw_ZMMi16_ZMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="xmm {k}, xmm, xmm" xed="VPSUBSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_subs_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBSW" form="xmm {z}, xmm, xmm" xed="VPSUBSW_XMMi16_MASKmskw_XMMi16_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="ymm {k}, ymm, ymm" xed="VPSUBUSB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="ymm {z}, ymm, ymm" xed="VPSUBUSB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="zmm {k}, zmm, zmm" xed="VPSUBUSB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="zmm {z}, zmm, zmm" xed="VPSUBUSB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="zmm, zmm, zmm" xed="VPSUBUSB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="xmm {k}, xmm, xmm" xed="VPSUBUSB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_subs_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBUSB" form="xmm {z}, xmm, xmm" xed="VPSUBUSB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="ymm {k}, ymm, ymm" xed="VPSUBUSW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="ymm {z}, ymm, ymm" xed="VPSUBUSW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="zmm {k}, zmm, zmm" xed="VPSUBUSW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="zmm {z}, zmm, zmm" xed="VPSUBUSW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="zmm, zmm, zmm" xed="VPSUBUSW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="xmm {k}, xmm, xmm" xed="VPSUBUSW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_subs_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBUSW" form="xmm {z}, xmm, xmm" xed="VPSUBUSW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBW" form="ymm {k}, ymm, ymm" xed="VPSUBW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBW" form="ymm {z}, ymm, ymm" xed="VPSUBW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBW" form="zmm {k}, zmm, zmm" xed="VPSUBW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBW" form="zmm {z}, zmm, zmm" xed="VPSUBW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBW" form="zmm, zmm, zmm" xed="VPSUBW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBW" form="xmm {k}, xmm, xmm" xed="VPSUBW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[i+15:i] - b[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBW" form="xmm {z}, xmm, xmm" xed="VPSUBW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTMB" form="k {k}, ymm, ymm" xed="VPTESTMB_MASKmskw_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTMB" form="k, ymm, ymm" xed="VPTESTMB_MASKmskw_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPTESTMB" form="k {k}, zmm, zmm" xed="VPTESTMB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPTESTMB" form="k, zmm, zmm" xed="VPTESTMB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTMB" form="k {k}, xmm, xmm" xed="VPTESTMB_MASKmskw_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_test_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise AND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTMB" form="k, xmm, xmm" xed="VPTESTMB_MASKmskw_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTMW" form="k {k}, ymm, ymm" xed="VPTESTMW_MASKmskw_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTMW" form="k, ymm, ymm" xed="VPTESTMW_MASKmskw_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTMW" form="k {k}, zmm, zmm" xed="VPTESTMW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTMW" form="k, zmm, zmm" xed="VPTESTMW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTMW" form="k {k}, xmm, xmm" xed="VPTESTMW_MASKmskw_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_test_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise AND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTMW" form="k, xmm, xmm" xed="VPTESTMW_MASKmskw_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTNMB" form="k {k}, ymm, ymm" xed="VPTESTNMB_MASKmskw_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTNMB" form="k, ymm, ymm" xed="VPTESTNMB_MASKmskw_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPTESTNMB" form="k {k}, zmm, zmm" xed="VPTESTNMB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:64] := 0
+	</operation>
+	<instruction name="VPTESTNMB" form="k, zmm, zmm" xed="VPTESTNMB_MASKmskw_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k1[j]
+		k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTNMB" form="k {k}, xmm, xmm" xed="VPTESTNMB_MASKmskw_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_testn_epi8_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compute the bitwise NAND of packed 8-bit integers in "a" and "b", producing intermediate 8-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	k[j] := ((a[i+7:i] AND b[i+7:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTNMB" form="k, xmm, xmm" xed="VPTESTNMB_MASKmskw_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTNMW" form="k {k}, ymm, ymm" xed="VPTESTNMW_MASKmskw_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTNMW" form="k, ymm, ymm" xed="VPTESTNMW_MASKmskw_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTNMW" form="k {k}, zmm, zmm" xed="VPTESTNMW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:32] := 0
+	</operation>
+	<instruction name="VPTESTNMW" form="k, zmm, zmm" xed="VPTESTNMW_MASKmskw_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k1[j]
+		k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTNMW" form="k {k}, xmm, xmm" xed="VPTESTNMW_MASKmskw_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_testn_epi16_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compute the bitwise NAND of packed 16-bit integers in "a" and "b", producing intermediate 16-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	k[j] := ((a[i+15:i] AND b[i+15:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTNMW" form="k, xmm, xmm" xed="VPTESTNMW_MASKmskw_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="ymm {k}, ymm, ymm" xed="VPUNPCKHBW_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="ymm {z}, ymm, ymm" xed="VPUNPCKHBW_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="zmm {k}, zmm, zmm" xed="VPUNPCKHBW_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="zmm {z}, zmm, zmm" xed="VPUNPCKHBW_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_BYTES(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_BYTES(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="zmm, zmm, zmm" xed="VPUNPCKHBW_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="xmm {k}, xmm, xmm" xed="VPUNPCKHBW_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHBW" form="xmm {z}, xmm, xmm" xed="VPUNPCKHBW_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="ymm {k}, ymm, ymm" xed="VPUNPCKHWD_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="ymm {z}, ymm, ymm" xed="VPUNPCKHWD_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="zmm {k}, zmm, zmm" xed="VPUNPCKHWD_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="zmm {z}, zmm, zmm" xed="VPUNPCKHWD_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_WORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_WORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="zmm, zmm, zmm" xed="VPUNPCKHWD_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="xmm {k}, xmm, xmm" xed="VPUNPCKHWD_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHWD" form="xmm {z}, xmm, xmm" xed="VPUNPCKHWD_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="ymm {k}, ymm, ymm" xed="VPUNPCKLBW_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="ymm {z}, ymm, ymm" xed="VPUNPCKLBW_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="zmm {k}, zmm, zmm" xed="VPUNPCKLBW_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="zmm {z}, zmm, zmm" xed="VPUNPCKLBW_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_BYTES(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_BYTES(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="zmm, zmm, zmm" xed="VPUNPCKLBW_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="xmm {k}, xmm, xmm" xed="VPUNPCKLBW_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := tmp_dst[i+7:i]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLBW" form="xmm {z}, xmm, xmm" xed="VPUNPCKLBW_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="ymm {k}, ymm, ymm" xed="VPUNPCKLWD_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="ymm {z}, ymm, ymm" xed="VPUNPCKLWD_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="zmm {k}, zmm, zmm" xed="VPUNPCKLWD_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="zmm {z}, zmm, zmm" xed="VPUNPCKLWD_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_WORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_WORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="zmm, zmm, zmm" xed="VPUNPCKLWD_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="xmm {k}, xmm, xmm" xed="VPUNPCKLWD_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := tmp_dst[i+15:i]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLWD" form="xmm {z}, xmm, xmm" xed="VPUNPCKLWD_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI16" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Store 512-bits (composed of 32 packed 16-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQU16" form="m512, zmm" xed="VMOVDQU16_MEMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI8" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Store 512-bits (composed of 64 packed 8-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQU8" form="m512, zmm" xed="VMOVDQU8_MEMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI16" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Store 256-bits (composed of 16 packed 16-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQU16" form="m256, ymm" xed="VMOVDQU16_MEMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI8" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Store 256-bits (composed of 32 packed 8-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQU8" form="m256, ymm" xed="VMOVDQU8_MEMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI16" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Store 128-bits (composed of 8 packed 16-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="VMOVDQU16" form="m128, xmm" xed="VMOVDQU16_MEMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Store 128-bits (composed of 16 packed 8-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="VMOVDQU8" form="m128, xmm" xed="VMOVDQU8_MEMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="512"/>
+	<description>Load 512-bits (composed of 32 packed 16-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="zmm, m512" xed="VMOVDQU16_ZMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="512"/>
+	<description>Load 512-bits (composed of 64 packed 8-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="zmm, m512" xed="VMOVDQU8_ZMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="256"/>
+	<description>Load 256-bits (composed of 16 packed 16-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="ymm, m256" xed="VMOVDQU16_YMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="256"/>
+	<description>Load 256-bits (composed of 32 packed 8-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="ymm, m256" xed="VMOVDQU8_YMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_loadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="128"/>
+	<description>Load 128-bits (composed of 8 packed 16-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU16" form="xmm, m128" xed="VMOVDQU16_XMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_loadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<description>Load 128-bits (composed of 16 packed 8-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU8" form="xmm, m128" xed="VMOVDQU8_XMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kadd_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Add 32-bit masks in "a" and "b", and store the result in "k".</description>
+	<operation>
+k[31:0] := a[31:0] + b[31:0]
+k[MAX:32] := 0
+	</operation>
+	<instruction name="KADDD" form="k, k, k" xed="KADDD_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kadd_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Add 64-bit masks in "a" and "b", and store the result in "k".</description>
+	<operation>
+k[63:0] := a[63:0] + b[63:0]
+k[MAX:64] := 0
+	</operation>
+	<instruction name="KADDQ" form="k, k, k" xed="KADDQ_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kand_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 32-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[31:0] := a[31:0] AND b[31:0]
+k[MAX:32] := 0
+	</operation>
+	<instruction name="KANDD" form="k, k, k" xed="KANDD_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kand_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 64-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[63:0] := a[63:0] AND b[63:0]
+k[MAX:64] := 0
+	</operation>
+	<instruction name="KANDQ" form="k, k, k" xed="KANDQ_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kandn_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 32-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[31:0] := (NOT a[31:0]) AND b[31:0]
+k[MAX:32] := 0
+	</operation>
+	<instruction name="KANDND" form="k, k, k" xed="KANDND_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kandn_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 64-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[63:0] := (NOT a[63:0]) AND b[63:0]
+k[MAX:64] := 0
+	</operation>
+	<instruction name="KANDNQ" form="k, k, k" xed="KANDNQ_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_knot_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<description>Compute the bitwise NOT of 32-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[31:0] := NOT a[31:0]
+k[MAX:32] := 0
+	</operation>
+	<instruction name="KNOTD" form="k, k" xed="KNOTD_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_knot_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<description>Compute the bitwise NOT of 64-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[63:0] := NOT a[63:0]
+k[MAX:64] := 0
+	</operation>
+	<instruction name="KNOTQ" form="k, k" xed="KNOTQ_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kor_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 32-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[31:0] := a[31:0] OR b[31:0]
+k[MAX:32] := 0
+	</operation>
+	<instruction name="KORD" form="k, k, k" xed="KORD_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kor_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 64-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[63:0] := a[63:0] OR b[63:0]
+k[MAX:64] := 0
+	</operation>
+	<instruction name="KORQ" form="k, k, k" xed="KORQ_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxnor_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XNOR of 32-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[31:0] := NOT (a[31:0] XOR b[31:0])
+k[MAX:32] := 0
+	</operation>
+	<instruction name="KXNORD" form="k, k, k" xed="KXNORD_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxnor_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XNOR of 64-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[63:0] := NOT (a[63:0] XOR b[63:0])
+k[MAX:64] := 0
+	</operation>
+	<instruction name="KXNORQ" form="k, k, k" xed="KXNORQ_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxor_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XOR of 32-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[31:0] := a[31:0] XOR b[31:0]
+k[MAX:32] := 0
+	</operation>
+	<instruction name="KXORD" form="k, k, k" xed="KXORD_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxor_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XOR of 64-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[63:0] := a[63:0] XOR b[63:0]
+k[MAX:64] := 0
+	</operation>
+	<instruction name="KXORQ" form="k, k, k" xed="KXORQ_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftli_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 32-bit mask "a" left by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 31
+	k[31:0] := a[31:0] &lt;&lt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTLD" form="k, k, imm8" xed="KSHIFTLD_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftli_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 64-bit mask "a" left by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 63
+	k[63:0] := a[63:0] &lt;&lt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTLQ" form="k, k, imm8" xed="KSHIFTLQ_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftri_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 32-bit mask "a" right by "count" while shifting in zeros, and store the least significant 32 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 31
+	k[31:0] := a[31:0] &gt;&gt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTRD" form="k, k, imm8" xed="KSHIFTRD_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftri_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 64-bit mask "a" right by "count" while shifting in zeros, and store the least significant 64 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 63
+	k[63:0] := a[63:0] &gt;&gt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTRQ" form="k, k, imm8" xed="KSHIFTRQ_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_load_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__mmask32*" varname="mem_addr" etype="MASK" memwidth="32"/>
+	<description>Load 32-bit mask from memory into "k".</description>
+	<operation>
+k[31:0] := MEM[mem_addr+31:mem_addr]
+	</operation>
+	<instruction name="KMOVD" form="k, m32" xed="KMOVD_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_load_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Load</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__mmask64*" varname="mem_addr" etype="MASK" memwidth="64"/>
+	<description>Load 64-bit mask from memory into "k".</description>
+	<operation>
+k[63:0] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name="KMOVQ" form="k, m64" xed="KMOVQ_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_store_mask32">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__mmask32*" varname="mem_addr" etype="MASK" memwidth="32"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<description>Store 32-bit mask from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+	</operation>
+	<instruction name="KMOVD" form="m32, k" xed="KMOVD_MEMu32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_store_mask64">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__mmask64*" varname="mem_addr" etype="MASK" memwidth="64"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<description>Store 64-bit mask from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name="KMOVQ" form="m64, k" xed="KMOVQ_MEMu64_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortest_mask32_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="all_ones" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones".</description>
+	<operation>
+tmp[31:0] := a[31:0] OR b[31:0]
+IF tmp[31:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+IF tmp[31:0] == 0xFFFFFFFF
+	MEM[all_ones+7:all_ones] := 1
+ELSE
+	MEM[all_ones+7:all_ones] := 0
+FI
+	</operation>
+	<instruction name="KORTESTD" form="k, k" xed="KORTESTD_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestz_mask32_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[31:0] := a[31:0] OR b[31:0]
+IF tmp[31:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTD" form="k, k" xed="KORTESTD_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestc_mask32_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 32-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[31:0] := a[31:0] OR b[31:0]
+IF tmp[31:0] == 0xFFFFFFFF
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTD" form="k, k" xed="KORTESTD_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortest_mask64_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="all_ones" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones".</description>
+	<operation>
+tmp[63:0] := a[63:0] OR b[63:0]
+IF tmp[63:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+IF tmp[7:0] == 0xFFFFFFFFFFFFFFFF
+	MEM[all_ones+7:all_ones] := 1
+ELSE
+	MEM[all_ones+7:all_ones] := 0
+FI
+	</operation>
+	<instruction name="KORTESTQ" form="k, k" xed="KORTESTQ_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestz_mask64_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[63:0] := a[63:0] OR b[63:0]
+IF tmp[63:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTQ" form="k, k" xed="KORTESTQ_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestc_mask64_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 64-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[63:0] := a[63:0] OR b[63:0]
+IF tmp[63:0] == 0xFFFFFFFFFFFFFFFF
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTQ" form="k, k" xed="KORTESTQ_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktest_mask32_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="and_not" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not".</description>
+	<operation>
+tmp1[31:0] := a[31:0] AND b[31:0]
+IF tmp1[31:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+tmp2[31:0] := (NOT a[31:0]) AND b[31:0]
+IF tmp2[31:0] == 0x0
+	MEM[and_not+7:and_not] := 1
+ELSE
+	MEM[and_not+7:and_not] := 0
+FI
+	</operation>
+	<instruction name="KTESTD" form="k, k" xed="KTESTD_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestz_mask32_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 32-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[31:0] := a[31:0] AND b[31:0]
+IF tmp[31:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTD" form="k, k" xed="KTESTD_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestc_mask32_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<parameter type="__mmask32" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 32-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[31:0] := (NOT a[31:0]) AND b[31:0]
+IF tmp[31:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTD" form="k, k" xed="KTESTD_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktest_mask64_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="and_not" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not".</description>
+	<operation>
+tmp1[63:0] := a[63:0] AND b[63:0]
+IF tmp1[63:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+tmp2[63:0] := (NOT a[63:0]) AND b[63:0]
+IF tmp2[63:0] == 0x0
+	MEM[and_not+7:and_not] := 1
+ELSE
+	MEM[and_not+7:and_not] := 0
+FI
+	</operation>
+	<instruction name="KTESTQ" form="k, k" xed="KTESTQ_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestz_mask64_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 64-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[63:0] := a[63:0] AND b[63:0]
+IF tmp[63:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTQ" form="k, k" xed="KTESTQ_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestc_mask64_u8">
+	<type>Mask</type>
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<parameter type="__mmask64" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 64-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[63:0] := (NOT a[63:0]) AND b[63:0]
+IF tmp[63:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTQ" form="k, k" xed="KTESTQ_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtmask32_u32">
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask32" varname="a" etype="MASK"/>
+	<description>Convert 32-bit mask "a" into an integer value, and store the result in "dst".</description>
+	<operation>
+dst := ZeroExtend32(a[31:0])
+	</operation>
+	<instruction name="KMOVD" form="r32, k" xed="KMOVD_GPR32u32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtmask64_u64">
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask64" varname="a" etype="MASK"/>
+	<description>Convert 64-bit mask "a" into an integer value, and store the result in "dst".</description>
+	<operation>
+dst := ZeroExtend64(a[63:0])
+	</operation>
+	<instruction name="KMOVQ" form="r64, k" xed="KMOVQ_GPR64u64_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtu32_mask32">
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Convert integer value "a" into an 32-bit mask, and store the result in "k".</description>
+	<operation>
+k := ZeroExtend32(a[31:0])
+	</operation>
+	<instruction name="KMOVD" form="k, r32" xed="KMOVD_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtu64_mask64">
+	<CPUID>AVX512BW</CPUID>
+	<category>Mask</category>
+	<return type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Convert integer value "a" into an 64-bit mask, and store the result in "k".</description>
+	<operation>
+k := ZeroExtend64(a[63:0])
+	</operation>
+	<instruction name="KMOVQ" form="k, r64" xed="KMOVQ_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_broadcastmb_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ZeroExtend64(k[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTMB2Q" form="ymm" xed="VPBROADCASTMB2Q_YMMu64_MASKu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_broadcastmb_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ZeroExtend64(k[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTMB2Q" form="xmm" xed="VPBROADCASTMB2Q_XMMu64_MASKu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_broadcastmw_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ZeroExtend32(k[15:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTMW2D" form="ymm" xed="VPBROADCASTMW2D_YMMu32_MASKu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_broadcastmw_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ZeroExtend32(k[15:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTMW2D" form="xmm" xed="VPBROADCASTMW2D_XMMu32_MASKu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	FOR k := 0 to j-1
+		m := k*32
+		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+	ENDFOR
+	dst[i+31:i+j] := 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="ymm, ymm" xed="VPCONFLICTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="ymm {k}, ymm" xed="VPCONFLICTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="ymm {z}, ymm" xed="VPCONFLICTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	FOR k := 0 to j-1
+		m := k*32
+		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+	ENDFOR
+	dst[i+31:i+j] := 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="xmm, xmm" xed="VPCONFLICTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="xmm {k}, xmm" xed="VPCONFLICTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="xmm {z}, xmm" xed="VPCONFLICTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	FOR k := 0 to j-1
+		m := k*64
+		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+	ENDFOR
+	dst[i+63:i+j] := 0
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="ymm, ymm" xed="VPCONFLICTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="ymm {k}, ymm" xed="VPCONFLICTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="ymm {z}, ymm" xed="VPCONFLICTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	FOR k := 0 to j-1
+		m := k*64
+		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+	ENDFOR
+	dst[i+63:i+j] := 0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="xmm, xmm" xed="VPCONFLICTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="xmm {k}, xmm" xed="VPCONFLICTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="xmm {z}, xmm" xed="VPCONFLICTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	tmp := 31
+	dst[i+31:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+31:i] := dst[i+31:i] + 1
+	OD
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="ymm, ymm" xed="VPLZCNTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="ymm {k}, ymm" xed="VPLZCNTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="ymm {z}, ymm" xed="VPLZCNTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	tmp := 31
+	dst[i+31:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+31:i] := dst[i+31:i] + 1
+	OD
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="xmm, xmm" xed="VPLZCNTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="xmm {k}, xmm" xed="VPLZCNTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="xmm {z}, xmm" xed="VPLZCNTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp := 63
+	dst[i+63:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+63:i] := dst[i+63:i] + 1
+	OD
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="ymm, ymm" xed="VPLZCNTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="ymm {k}, ymm" xed="VPLZCNTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="ymm {z}, ymm" xed="VPLZCNTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp := 63
+	dst[i+63:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+63:i] := dst[i+63:i] + 1
+	OD
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="xmm, xmm" xed="VPLZCNTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="xmm {k}, xmm" xed="VPLZCNTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="xmm {z}, xmm" xed="VPLZCNTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastmb_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Broadcast the low 8-bits from input mask "k" to all 64-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ZeroExtend64(k[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTMB2Q" form="zmm" xed="VPBROADCASTMB2Q_ZMMu64_MASKu64_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastmw_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Broadcast the low 16-bits from input mask "k" to all 32-bit elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ZeroExtend32(k[15:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTMW2D" form="zmm" xed="VPBROADCASTMW2D_ZMMu32_MASKu32_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	FOR k := 0 to j-1
+		m := k*32
+		dst[i+k] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+	ENDFOR
+	dst[i+31:i+j] := 0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="zmm, zmm" xed="VPCONFLICTD_ZMMu32_MASKmskw_ZMMu32_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="zmm {k}, zmm" xed="VPCONFLICTD_ZMMu32_MASKmskw_ZMMu32_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_conflict_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Test each 32-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*32
+			dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0
+		ENDFOR
+		dst[i+31:i+j] := 0
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCONFLICTD" form="zmm {z}, zmm" xed="VPCONFLICTD_ZMMu32_MASKmskw_ZMMu32_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	FOR k := 0 to j-1
+		m := k*64
+		dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+	ENDFOR
+	dst[i+63:i+j] := 0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="zmm, zmm" xed="VPCONFLICTQ_ZMMu64_MASKmskw_ZMMu64_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="zmm {k}, zmm" xed="VPCONFLICTQ_ZMMu64_MASKmskw_ZMMu64_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_conflict_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Compare</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Test each 64-bit element of "a" for equality with all other elements in "a" closer to the least significant bit using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR l := 0 to j-1
+			m := l*64
+			dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
+		ENDFOR
+		dst[i+63:i+j] := 0
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCONFLICTQ" form="zmm {z}, zmm" xed="VPCONFLICTQ_ZMMu64_MASKmskw_ZMMu64_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	tmp := 31
+	dst[i+31:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+31:i] := dst[i+31:i] + 1
+	OD
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="zmm, zmm" xed="VPLZCNTD_ZMMu32_MASKmskw_ZMMu32_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="zmm {k}, zmm" xed="VPLZCNTD_ZMMu32_MASKmskw_ZMMu32_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_lzcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Counts the number of leading zero bits in each packed 32-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := 31
+		dst[i+31:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+31:i] := dst[i+31:i] + 1
+		OD
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPLZCNTD" form="zmm {z}, zmm" xed="VPLZCNTD_ZMMu32_MASKmskw_ZMMu32_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp := 63
+	dst[i+63:i] := 0
+	DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+		tmp := tmp - 1
+		dst[i+63:i] := dst[i+63:i] + 1
+	OD
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="zmm, zmm" xed="VPLZCNTQ_ZMMu64_MASKmskw_ZMMu64_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="zmm {k}, zmm" xed="VPLZCNTQ_ZMMu64_MASKmskw_ZMMu64_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_lzcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512CD</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Counts the number of leading zero bits in each packed 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := 63
+		dst[i+63:i] := 0
+		DO WHILE (tmp &gt;= 0 AND a[i+tmp] == 0)
+			tmp := tmp - 1
+			dst[i+63:i] := dst[i+63:i] + 1
+		OD
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPLZCNTQ" form="zmm {z}, zmm" xed="VPLZCNTQ_ZMMu64_MASKmskw_ZMMu64_AVX512CD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDNPD" form="ymm {k}, ymm, ymm" xed="VANDNPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDNPD" form="ymm {z}, ymm, ymm" xed="VANDNPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDNPD" form="zmm, zmm, zmm" xed="VANDNPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDNPD" form="zmm {k}, zmm, zmm" xed="VANDNPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDNPD" form="zmm {z}, zmm, zmm" xed="VANDNPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDNPD" form="xmm {k}, xmm, xmm" xed="VANDNPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDNPD" form="xmm {z}, xmm, xmm" xed="VANDNPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDNPS" form="ymm {k}, ymm, ymm" xed="VANDNPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDNPS" form="ymm {z}, ymm, ymm" xed="VANDNPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDNPS" form="zmm, zmm, zmm" xed="VANDNPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDNPS" form="zmm {k}, zmm, zmm" xed="VANDNPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDNPS" form="zmm {z}, zmm, zmm" xed="VANDNPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDNPS" form="xmm {k}, xmm, xmm" xed="VANDNPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDNPS" form="xmm {z}, xmm, xmm" xed="VANDNPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDPD" form="ymm {k}, ymm, ymm" xed="VANDPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0 
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDPD" form="ymm {z}, ymm, ymm" xed="VANDPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDPD" form="zmm, zmm, zmm" xed="VANDPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDPD" form="zmm {k}, zmm, zmm" xed="VANDPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDPD" form="zmm {z}, zmm, zmm" xed="VANDPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDPD" form="xmm {k}, xmm, xmm" xed="VANDPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_and_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDPD" form="xmm {z}, xmm, xmm" xed="VANDPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDPS" form="ymm {k}, ymm, ymm" xed="VANDPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VANDPS" form="ymm {z}, ymm, ymm" xed="VANDPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDPS" form="zmm, zmm, zmm" xed="VANDPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDPS" form="zmm {k}, zmm, zmm" xed="VANDPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VANDPS" form="zmm {z}, zmm, zmm" xed="VANDPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDPS" form="xmm {k}, xmm, xmm" xed="VANDPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_and_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VANDPS" form="xmm {z}, xmm, xmm" xed="VANDPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X2" form="ymm, xmm" xed="VBROADCASTF32X2_YMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X2" form="ymm {k}, xmm" xed="VBROADCASTF32X2_YMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X2" form="ymm {z}, xmm" xed="VBROADCASTF32X2_YMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X2" form="zmm, xmm" xed="VBROADCASTF32X2_ZMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X2" form="zmm {k}, xmm" xed="VBROADCASTF32X2_ZMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcast_f32x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X2" form="zmm {z}, xmm" xed="VBROADCASTF32X2_ZMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_broadcast_f32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 8)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X8" form="zmm, m256" xed="VBROADCASTF32X8_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_mask_broadcast_f32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X8" form="zmm {k}, m256" xed="VBROADCASTF32X8_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_maskz_broadcast_f32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Broadcast the 8 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X8" form="zmm {z}, m256" xed="VBROADCASTF32X8_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j % 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X2" form="ymm, m128" xed="VBROADCASTF64X2_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_mask_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X2" form="ymm {k}, m128" xed="VBROADCASTF64X2_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_maskz_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X2" form="ymm {z}, m128" xed="VBROADCASTF64X2_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X2" form="zmm, m128" xed="VBROADCASTF64X2_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_mask_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X2" form="zmm {k}, m128" xed="VBROADCASTF64X2_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_maskz_broadcast_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the 2 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X2" form="zmm {z}, m128" xed="VBROADCASTF64X2_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="ymm, xmm" xed="VBROADCASTI32X2_YMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="ymm {k}, xmm" xed="VBROADCASTI32X2_YMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="ymm {z}, xmm" xed="VBROADCASTI32X2_YMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcast_i32x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="zmm, xmm" xed="VBROADCASTI32X2_ZMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcast_i32x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="zmm {k}, xmm" xed="VBROADCASTI32X2_ZMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcast_i32x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="zmm {z}, xmm" xed="VBROADCASTI32X2_ZMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	n := (j % 2)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="xmm, xmm" xed="VBROADCASTI32X2_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="xmm {k}, xmm" xed="VBROADCASTI32X2_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_broadcast_i32x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the lower 2 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	n := (j % 2)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X2" form="xmm {z}, xmm" xed="VBROADCASTI32X2_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_broadcast_i32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 8)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X8" form="zmm, m256" xed="VBROADCASTI32X8_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_mask_broadcast_i32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X8" form="zmm {k}, m256" xed="VBROADCASTI32X8_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_maskz_broadcast_i32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Broadcast the 8 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 8)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X8" form="zmm {z}, m256" xed="VBROADCASTI32X8_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_broadcast_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j % 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X2" form="ymm, m128" xed="VBROADCASTI64X2_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_mask_broadcast_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X2" form="ymm {k}, m128" xed="VBROADCASTI64X2_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_maskz_broadcast_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X2" form="ymm {z}, m128" xed="VBROADCASTI64X2_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_broadcast_i64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 2)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X2" form="zmm, m128" xed="VBROADCASTI64X2_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_mask_broadcast_i64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X2" form="zmm {k}, m128" xed="VBROADCASTI64X2_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_maskz_broadcast_i64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the 2 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 2)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X2" form="zmm {z}, m128" xed="VBROADCASTI64X2_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="ymm, ymm" xed="VCVTPD2QQ_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="ymm {k}, ymm" xed="VCVTPD2QQ_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="ymm {z}, ymm" xed="VCVTPD2QQ_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="zmm, zmm {er}" xed="VCVTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="zmm, zmm" xed="VCVTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="zmm {k}, zmm {er}" xed="VCVTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="zmm {k}, zmm" xed="VCVTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="zmm {z}, zmm {er}" xed="VCVTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="zmm {z}, zmm" xed="VCVTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="xmm, xmm" xed="VCVTPD2QQ_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="xmm {k}, xmm" xed="VCVTPD2QQ_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2QQ" form="xmm {z}, xmm" xed="VCVTPD2QQ_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="ymm, ymm" xed="VCVTPD2UQQ_YMMu64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="ymm {k}, ymm" xed="VCVTPD2UQQ_YMMu64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="ymm {z}, ymm" xed="VCVTPD2UQQ_YMMu64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="zmm, zmm {er}" xed="VCVTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="zmm, zmm" xed="VCVTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="zmm {k}, zmm {er}" xed="VCVTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="zmm {k}, zmm" xed="VCVTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="zmm {z}, zmm {er}" xed="VCVTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="zmm {z}, zmm" xed="VCVTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="xmm, xmm" xed="VCVTPD2UQQ_XMMu64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="xmm {k}, xmm" xed="VCVTPD2UQQ_XMMu64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2UQQ" form="xmm {z}, xmm" xed="VCVTPD2UQQ_XMMu64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="ymm, xmm" xed="VCVTPS2QQ_YMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="ymm {k}, xmm" xed="VCVTPS2QQ_YMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="ymm {z}, xmm" xed="VCVTPS2QQ_YMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="zmm, ymm {er}" xed="VCVTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="zmm, ymm" xed="VCVTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	 [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="zmm {k}, ymm {er}" xed="VCVTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="zmm {k}, ymm" xed="VCVTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="zmm {z}, ymm {er}" xed="VCVTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="zmm {z}, ymm" xed="VCVTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="xmm, xmm" xed="VCVTPS2QQ_XMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="xmm {k}, xmm" xed="VCVTPS2QQ_XMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2QQ" form="xmm {z}, xmm" xed="VCVTPS2QQ_XMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="ymm, xmm" xed="VCVTPS2UQQ_YMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="ymm {k}, xmm" xed="VCVTPS2UQQ_YMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="ymm {z}, xmm" xed="VCVTPS2UQQ_YMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="zmm, ymm {er}" xed="VCVTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="zmm, ymm" xed="VCVTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="zmm {k}, ymm {er}" xed="VCVTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="zmm {k}, ymm" xed="VCVTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="zmm {z}, ymm {er}" xed="VCVTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="zmm {z}, ymm" xed="VCVTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="xmm, xmm" xed="VCVTPS2UQQ_XMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="xmm {k}, xmm" xed="VCVTPS2UQQ_XMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2UQQ" form="xmm {z}, xmm" xed="VCVTPS2UQQ_XMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="ymm, ymm" xed="VCVTQQ2PD_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="ymm {k}, ymm" xed="VCVTQQ2PD_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="ymm {z}, ymm" xed="VCVTQQ2PD_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="zmm, zmm {er}" xed="VCVTQQ2PD_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="zmm, zmm" xed="VCVTQQ2PD_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="zmm {k}, zmm {er}" xed="VCVTQQ2PD_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="zmm {k}, zmm" xed="VCVTQQ2PD_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="zmm {z}, zmm {er}" xed="VCVTQQ2PD_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="zmm {z}, zmm" xed="VCVTQQ2PD_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="xmm, xmm" xed="VCVTQQ2PD_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="xmm {k}, xmm" xed="VCVTQQ2PD_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTQQ2PD" form="xmm {z}, xmm" xed="VCVTQQ2PD_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="xmm, ymm" xed="VCVTQQ2PS_XMMf32_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="xmm {k}, ymm" xed="VCVTQQ2PS_XMMf32_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="xmm {z}, ymm" xed="VCVTQQ2PS_XMMf32_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="ymm, zmm {er}" xed="VCVTQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="ymm, zmm" xed="VCVTQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="ymm {k}, zmm {er}" xed="VCVTQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="ymm {k}, zmm" xed="VCVTQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="ymm {z}, zmm {er}" xed="VCVTQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="ymm {z}, zmm" xed="VCVTQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="xmm, xmm" xed="VCVTQQ2PS_XMMf32_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="xmm {k}, xmm" xed="VCVTQQ2PS_XMMf32_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTQQ2PS" form="xmm {z}, xmm" xed="VCVTQQ2PS_XMMf32_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="ymm, ymm" xed="VCVTTPD2QQ_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="ymm {k}, ymm" xed="VCVTTPD2QQ_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="ymm {z}, ymm" xed="VCVTTPD2QQ_YMMi64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="zmm, zmm {sae}" xed="VCVTTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="zmm, zmm" xed="VCVTTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="zmm {k}, zmm {sae}" xed="VCVTTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="zmm {k}, zmm" xed="VCVTTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="zmm {z}, zmm {sae}" xed="VCVTTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="zmm {z}, zmm" xed="VCVTTPD2QQ_ZMMi64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="xmm, xmm" xed="VCVTTPD2QQ_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="xmm {k}, xmm" xed="VCVTTPD2QQ_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttpd_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_Int64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2QQ" form="xmm {z}, xmm" xed="VCVTTPD2QQ_XMMi64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="ymm, ymm" xed="VCVTTPD2UQQ_YMMu64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="ymm {k}, ymm" xed="VCVTTPD2UQQ_YMMu64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="ymm {z}, ymm" xed="VCVTTPD2UQQ_YMMu64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="zmm, zmm {sae}" xed="VCVTTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="zmm, zmm" xed="VCVTTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="zmm {k}, zmm {sae}" xed="VCVTTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="zmm {k}, zmm" xed="VCVTTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="zmm {z}, zmm {sae}" xed="VCVTTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="zmm {z}, zmm" xed="VCVTTPD2UQQ_ZMMu64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="xmm, xmm" xed="VCVTTPD2UQQ_XMMu64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="xmm {k}, xmm" xed="VCVTTPD2UQQ_XMMu64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttpd_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_FP64_To_UInt64_Truncate(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2UQQ" form="xmm {z}, xmm" xed="VCVTTPD2UQQ_XMMu64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="ymm, xmm" xed="VCVTTPS2QQ_YMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="ymm {k}, xmm" xed="VCVTTPS2QQ_YMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="ymm {z}, xmm" xed="VCVTTPS2QQ_YMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="zmm, ymm {sae}" xed="VCVTTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="zmm, ymm" xed="VCVTTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="zmm {k}, ymm {sae}" xed="VCVTTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="zmm {k}, ymm" xed="VCVTTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="zmm {z}, ymm {sae}" xed="VCVTTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="zmm {z}, ymm" xed="VCVTTPS2QQ_ZMMi64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="xmm, xmm" xed="VCVTTPS2QQ_XMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="xmm {k}, xmm" xed="VCVTTPS2QQ_XMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttps_epi64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_Int64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2QQ" form="xmm {z}, xmm" xed="VCVTTPS2QQ_XMMi64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="ymm, xmm" xed="VCVTTPS2UQQ_YMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="ymm {k}, xmm" xed="VCVTTPS2UQQ_YMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="ymm {z}, xmm" xed="VCVTTPS2UQQ_YMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="zmm, ymm {sae}" xed="VCVTTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="zmm, ymm" xed="VCVTTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="zmm {k}, ymm {sae}" xed="VCVTTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="zmm {k}, ymm" xed="VCVTTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="zmm {z}, ymm {sae}" xed="VCVTTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="zmm {z}, ymm" xed="VCVTTPS2UQQ_ZMMu64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="xmm, xmm" xed="VCVTTPS2UQQ_XMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="xmm {k}, xmm" xed="VCVTTPS2UQQ_XMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttps_epu64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 64-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_UInt64_Truncate(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2UQQ" form="xmm {z}, xmm" xed="VCVTTPS2UQQ_XMMu64_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="ymm, ymm" xed="VCVTUQQ2PD_YMMf64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="ymm {k}, ymm" xed="VCVTUQQ2PD_YMMf64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="ymm {z}, ymm" xed="VCVTUQQ2PD_YMMf64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="zmm, zmm {er}" xed="VCVTUQQ2PD_ZMMf64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="zmm, zmm" xed="VCVTUQQ2PD_ZMMf64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="zmm {k}, zmm {er}" xed="VCVTUQQ2PD_ZMMf64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="zmm {k}, zmm" xed="VCVTUQQ2PD_ZMMf64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="zmm {z}, zmm {er}" xed="VCVTUQQ2PD_ZMMf64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="zmm {z}, zmm" xed="VCVTUQQ2PD_ZMMf64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="xmm, xmm" xed="VCVTUQQ2PD_XMMf64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="xmm {k}, xmm" xed="VCVTUQQ2PD_XMMf64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu64_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PD" form="xmm {z}, xmm" xed="VCVTUQQ2PD_XMMf64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="xmm, ymm" xed="VCVTUQQ2PS_XMMf32_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="xmm {k}, ymm" xed="VCVTUQQ2PS_XMMf32_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="xmm {z}, ymm" xed="VCVTUQQ2PS_XMMf32_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="ymm, zmm {er}" xed="VCVTUQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="ymm, zmm" xed="VCVTUQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="ymm {k}, zmm {er}" xed="VCVTUQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="ymm {k}, zmm" xed="VCVTUQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="ymm {z}, zmm {er}" xed="VCVTUQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="ymm {z}, zmm" xed="VCVTUQQ2PS_YMMf32_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="xmm, xmm" xed="VCVTUQQ2PS_XMMf32_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="xmm {k}, xmm" xed="VCVTUQQ2PS_XMMf32_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu64_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_Int64_To_FP32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTUQQ2PS" form="xmm {z}, xmm" xed="VCVTUQQ2PS_XMMf32_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extractf32x8_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X8" form="ymm, zmm, imm8" xed="VEXTRACTF32X8_YMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extractf32x8_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X8" form="ymm {k}, zmm, imm8" xed="VEXTRACTF32X8_YMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extractf32x8_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X8" form="ymm {z}, zmm, imm8" xed="VEXTRACTF32X8_YMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X2" form="xmm, ymm, imm8" xed="VEXTRACTF64X2_XMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X2" form="xmm {k}, ymm, imm8" xed="VEXTRACTF64X2_XMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X2" form="xmm {z}, ymm, imm8" xed="VEXTRACTF64X2_XMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[1:0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X2" form="xmm, zmm, imm8" xed="VEXTRACTF64X2_XMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X2" form="xmm {k}, zmm, imm8" xed="VEXTRACTF64X2_XMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extractf64x2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X2" form="xmm {z}, zmm, imm8" xed="VEXTRACTF64X2_XMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extracti32x8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X8" form="ymm, zmm, imm8" xed="VEXTRACTI32X8_YMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extracti32x8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X8" form="ymm {k}, zmm, imm8" xed="VEXTRACTI32X8_YMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extracti32x8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 8 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X8" form="ymm {z}, zmm, imm8" xed="VEXTRACTI32X8_YMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X2" form="xmm, ymm, imm8" xed="VEXTRACTI64X2_XMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X2" form="xmm {k}, ymm, imm8" xed="VEXTRACTI64X2_XMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X2" form="xmm {z}, ymm, imm8" xed="VEXTRACTI64X2_XMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[1:0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X2" form="xmm, zmm, imm8" xed="VEXTRACTI64X2_XMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X2" form="xmm {k}, zmm, imm8" xed="VEXTRACTI64X2_XMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extracti64x2_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 2 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X2" form="xmm {z}, zmm, imm8" xed="VEXTRACTI64X2_XMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VFPCLASSPD" form="k, ymm, imm8" xed="VFPCLASSPD_MASKmskw_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VFPCLASSPD" form="k {k}, ymm, imm8" xed="VFPCLASSPD_MASKmskw_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VFPCLASSPD" form="k, zmm, imm8" xed="VFPCLASSPD_MASKmskw_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VFPCLASSPD" form="k {k}, zmm, imm8" xed="VFPCLASSPD_MASKmskw_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VFPCLASSPD" form="k, xmm, imm8" xed="VFPCLASSPD_MASKmskw_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fpclass_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed double-precision (64-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := CheckFPClass_FP64(a[i+63:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VFPCLASSPD" form="k {k}, xmm, imm8" xed="VFPCLASSPD_MASKmskw_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VFPCLASSPS" form="k, ymm, imm8" xed="VFPCLASSPS_MASKmskw_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VFPCLASSPS" form="k {k}, ymm, imm8" xed="VFPCLASSPS_MASKmskw_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VFPCLASSPS" form="k, zmm, imm8" xed="VFPCLASSPS_MASKmskw_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VFPCLASSPS" form="k {k}, zmm, imm8" xed="VFPCLASSPS_MASKmskw_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k".
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VFPCLASSPS" form="k, xmm, imm8" xed="VFPCLASSPS_MASKmskw_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fpclass_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test packed single-precision (32-bit) floating-point elements in "a" for special categories specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).
+	[fpclass_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := CheckFPClass_FP32(a[i+31:i], imm8[7:0])
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VFPCLASSPS" form="k {k}, xmm, imm8" xed="VFPCLASSPS_MASKmskw_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fpclass_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k".
+	[fpclass_note]</description>
+	<operation>k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0])
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VFPCLASSSD" form="k, xmm, imm8" xed="VFPCLASSSD_MASKmskw_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fpclass_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test the lower double-precision (64-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
+	[fpclass_note]</description>
+	<operation>IF k1[0]
+	k[0] := CheckFPClass_FP64(a[63:0], imm8[7:0])
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VFPCLASSSD" form="k {k}, xmm, imm8" xed="VFPCLASSSD_MASKmskw_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fpclass_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k.
+	[fpclass_note]</description>
+	<operation>k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0])
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VFPCLASSSS" form="k, xmm, imm8" xed="VFPCLASSSS_MASKmskw_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fpclass_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Test the lower single-precision (32-bit) floating-point element in "a" for special categories specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).
+	[fpclass_note]</description>
+	<operation>IF k1[0]
+	k[0] := CheckFPClass_FP32(a[31:0], imm8[7:0])
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VFPCLASSSS" form="k {k}, xmm, imm8" xed="VFPCLASSSS_MASKmskw_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_insertf32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF32X8" form="zmm, zmm, ymm, imm8" xed="VINSERTF32X8_ZMMf32_MASKmskw_ZMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_insertf32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF32X8" form="zmm {k}, zmm, ymm, imm8" xed="VINSERTF32X8_ZMMf32_MASKmskw_ZMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_insertf32x8">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF32X8" form="zmm {z}, zmm, ymm, imm8" xed="VINSERTF32X8_ZMMf32_MASKmskw_ZMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE imm8[0] OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF64X2" form="ymm, ymm, xmm, imm8" xed="VINSERTF64X2_YMMf64_MASKmskw_YMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF64X2" form="ymm {k}, ymm, xmm, imm8" xed="VINSERTF64X2_YMMf64_MASKmskw_YMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF64X2" form="ymm {z}, ymm, xmm, imm8" xed="VINSERTF64X2_YMMf64_MASKmskw_YMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE imm8[1:0] OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF64X2" form="zmm, zmm, xmm, imm8" xed="VINSERTF64X2_ZMMf64_MASKmskw_ZMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF64X2" form="zmm {k}, zmm, xmm, imm8" xed="VINSERTF64X2_ZMMf64_MASKmskw_ZMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_insertf64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF64X2" form="zmm {z}, zmm, xmm, imm8" xed="VINSERTF64X2_ZMMf64_MASKmskw_ZMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_inserti32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE imm8[0] OF
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI32X8" form="zmm, zmm, ymm, imm8" xed="VINSERTI32X8_ZMMu32_MASKmskw_ZMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_inserti32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI32X8" form="zmm {k}, zmm, ymm, imm8" xed="VINSERTI32X8_ZMMu32_MASKmskw_ZMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_inserti32x8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 8 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI32X8" form="zmm {z}, zmm, ymm, imm8" xed="VINSERTI32X8_ZMMu32_MASKmskw_ZMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_inserti64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE imm8[0] OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTI64X2" form="ymm, ymm, xmm, imm8" xed="VINSERTI64X2_YMMu64_MASKmskw_YMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_inserti64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTI64X2" form="ymm {k}, ymm, xmm, imm8" xed="VINSERTI64X2_YMMu64_MASKmskw_YMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_inserti64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTI64X2" form="ymm {z}, ymm, xmm, imm8" xed="VINSERTI64X2_YMMu64_MASKmskw_YMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_inserti64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE imm8[1:0] OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI64X2" form="zmm, zmm, xmm, imm8" xed="VINSERTI64X2_ZMMu64_MASKmskw_ZMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_inserti64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI64X2" form="zmm {k}, zmm, xmm, imm8" xed="VINSERTI64X2_ZMMu64_MASKmskw_ZMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_inserti64x2">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 2 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI64X2" form="zmm {z}, zmm, xmm, imm8" xed="VINSERTI64X2_ZMMu64_MASKmskw_ZMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VORPD" form="ymm {k}, ymm, ymm" xed="VORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VORPD" form="ymm {z}, ymm, ymm" xed="VORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VORPD" form="zmm {k}, zmm, zmm" xed="VORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VORPD" form="zmm {z}, zmm, zmm" xed="VORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VORPD" form="zmm, zmm, zmm" xed="VORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VORPD" form="xmm {k}, xmm, xmm" xed="VORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_or_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VORPD" form="xmm {z}, xmm, xmm" xed="VORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VORPS" form="ymm {k}, ymm, ymm" xed="VORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VORPS" form="ymm {z}, ymm, ymm" xed="VORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VORPS" form="zmm {k}, zmm, zmm" xed="VORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VORPS" form="zmm {z}, zmm, zmm" xed="VORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VORPS" form="zmm, zmm, zmm" xed="VORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VORPS" form="xmm {k}, xmm, xmm" xed="VORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_or_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VORPS" form="xmm {z}, xmm, xmm" xed="VORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movepi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF a[i+31]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPMOVD2M" form="k, ymm" xed="VPMOVD2M_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movepi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF a[i+31]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVD2M" form="k, zmm" xed="VPMOVD2M_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movepi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 32-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPMOVD2M" form="k, xmm" xed="VPMOVD2M_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movm_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 0xFFFFFFFF
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVM2D" form="ymm" xed="VPMOVM2D_YMMu32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movm_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 0xFFFFFFFF
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVM2D" form="zmm" xed="VPMOVM2D_ZMMu32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movm_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Set each packed 32-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := 0xFFFFFFFF
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVM2D" form="xmm" xed="VPMOVM2D_XMMu32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movm_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 0xFFFFFFFFFFFFFFFF
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVM2Q" form="ymm" xed="VPMOVM2Q_YMMu64_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movm_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 0xFFFFFFFFFFFFFFFF
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVM2Q" form="zmm" xed="VPMOVM2Q_ZMMu64_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movm_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Set each packed 64-bit integer in "dst" to all ones or all zeros based on the value of the corresponding bit in "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 0xFFFFFFFFFFFFFFFF
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVM2Q" form="xmm" xed="VPMOVM2Q_XMMu64_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_movepi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF a[i+63]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPMOVQ2M" form="k, ymm" xed="VPMOVQ2M_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movepi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF a[i+63]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPMOVQ2M" form="k, zmm" xed="VPMOVQ2M_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_movepi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Set each bit of mask register "k" based on the most significant bit of the corresponding packed 64-bit integer in "a".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63]
+		k[j] := 1
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPMOVQ2M" form="k, xmm" xed="VPMOVQ2M_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="ymm {k}, ymm, ymm" xed="VPMULLQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="ymm {z}, ymm, ymm" xed="VPMULLQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := a[i+63:i] * b[i+63:i]
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="ymm, ymm, ymm" xed="VPMULLQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="zmm {k}, zmm, zmm" xed="VPMULLQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="zmm {z}, zmm, zmm" xed="VPMULLQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := a[i+63:i] * b[i+63:i]
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="zmm, zmm, zmm" xed="VPMULLQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="xmm {k}, xmm, xmm" xed="VPMULLQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := a[i+63:i] * b[i+63:i]
+		dst[i+63:i] := tmp[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="xmm {z}, xmm, xmm" xed="VPMULLQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mullo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Multiply the packed 64-bit integers in "a" and "b", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := a[i+63:i] * b[i+63:i]
+	dst[i+63:i] := tmp[63:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULLQ" form="xmm, xmm, xmm" xed="VPMULLQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="ymm {k}, ymm, ymm, imm8" xed="VRANGEPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="ymm {z}, ymm, ymm, imm8" xed="VRANGEPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="ymm, ymm, ymm, imm8" xed="VRANGEPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="zmm {k}, zmm, zmm, imm8" xed="VRANGEPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_range_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="zmm {k}, zmm, zmm {sae}, imm8" xed="VRANGEPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="zmm {z}, zmm, zmm, imm8" xed="VRANGEPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_range_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="zmm {z}, zmm, zmm {sae}, imm8" xed="VRANGEPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="zmm, zmm, zmm, imm8" xed="VRANGEPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_range_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="zmm, zmm, zmm {sae}, imm8" xed="VRANGEPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="xmm {k}, xmm, xmm, imm8" xed="VRANGEPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="xmm {z}, xmm, xmm, imm8" xed="VRANGEPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_range_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RANGE(a[i+63:i], b[i+63:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGEPD" form="xmm, xmm, xmm, imm8" xed="VRANGEPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="ymm {k}, ymm, ymm, imm8" xed="VRANGEPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="ymm {z}, ymm, ymm, imm8" xed="VRANGEPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="ymm, ymm, ymm, imm8" xed="VRANGEPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="zmm {k}, zmm, zmm, imm8" xed="VRANGEPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_range_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="zmm {k}, zmm, zmm {sae}, imm8" xed="VRANGEPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="zmm {z}, zmm, zmm, imm8" xed="VRANGEPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_range_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="zmm {z}, zmm, zmm {sae}, imm8" xed="VRANGEPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="zmm, zmm, zmm, imm8" xed="VRANGEPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_range_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="zmm, zmm, zmm {sae}, imm8" xed="VRANGEPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="xmm {k}, xmm, xmm, imm8" xed="VRANGEPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="xmm {z}, xmm, xmm, imm8" xed="VRANGEPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_range_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[63:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RANGE(a[i+31:i], b[i+31:i], imm8[1:0], imm8[3:2])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGEPS" form="xmm, xmm, xmm, imm8" xed="VRANGEPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_range_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESD" form="xmm {k}, xmm, xmm {sae}, imm8" xed="VRANGESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_range_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESD" form="xmm {k}, xmm, xmm, imm8" xed="VRANGESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_range_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESD" form="xmm {z}, xmm, xmm {sae}, imm8" xed="VRANGESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_range_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESD" form="xmm {z}, xmm, xmm, imm8" xed="VRANGESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_range_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[63:0], src2[63:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src1[63:0] : src2[63:0]
+	1: tmp[63:0] := (src1[63:0] &lt;= src2[63:0]) ? src2[63:0] : src1[63:0]
+	2: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src1[63:0] : src2[63:0]
+	3: tmp[63:0] := (ABS(src1[63:0]) &lt;= ABS(src2[63:0])) ? src2[63:0] : src1[63:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[63:0] := (src1[63] &lt;&lt; 63) OR (tmp[62:0])
+	1: dst[63:0] := tmp[63:0]
+	2: dst[63:0] := (0 &lt;&lt; 63) OR (tmp[62:0])
+	3: dst[63:0] := (1 &lt;&lt; 63) OR (tmp[62:0])
+	ESAC
+	
+	RETURN dst
+}
+dst[63:0] := RANGE(a[63:0], b[63:0], imm8[1:0], imm8[3:2])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESD" form="xmm, xmm, xmm {sae}, imm8" xed="VRANGESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_range_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESS" form="xmm {k}, xmm, xmm {sae}, imm8" xed="VRANGESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_range_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESS" form="xmm {k}, xmm, xmm, imm8" xed="VRANGESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_range_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESS" form="xmm {z}, xmm, xmm {sae}, imm8" xed="VRANGESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_range_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+IF k[0]
+	dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESS" form="xmm {z}, xmm, xmm, imm8" xed="VRANGESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_range_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Calculate the max, min, absolute max, or absolute min (depending on control in "imm8") for the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	imm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute max, 11 = absolute min.
+	imm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]</description>
+	<operation>
+DEFINE RANGE(src1[31:0], src2[31:0], opCtl[1:0], signSelCtl[1:0]) {
+	CASE opCtl[1:0] OF
+	0: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src1[31:0] : src2[31:0]
+	1: tmp[31:0] := (src1[31:0] &lt;= src2[31:0]) ? src2[31:0] : src1[31:0]
+	2: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src1[31:0] : src2[31:0]
+	3: tmp[31:0] := (ABS(src1[31:0]) &lt;= ABS(src2[31:0])) ? src2[31:0] : src1[31:0]
+	ESAC
+	
+	CASE signSelCtl[1:0] OF
+	0: dst[31:0] := (src1[31] &lt;&lt; 31) OR (tmp[30:0])
+	1: dst[31:0] := tmp[31:0]
+	2: dst[31:0] := (0 &lt;&lt; 31) OR (tmp[30:0])
+	3: dst[31:0] := (1 &lt;&lt; 31) OR (tmp[30:0])
+	ESAC
+	
+	RETURN dst
+}
+dst[31:0] := RANGE(a[31:0], b[31:0], imm8[1:0], imm8[3:2])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRANGESS" form="xmm, xmm, xmm {sae}, imm8" xed="VRANGESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="ymm {k}, ymm, imm8" xed="VREDUCEPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="ymm {z}, ymm, imm8" xed="VREDUCEPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="ymm, ymm, imm8" xed="VREDUCEPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="zmm {k}, zmm, imm8" xed="VREDUCEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_reduce_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="zmm {k}, zmm {sae}, imm8" xed="VREDUCEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="zmm {z}, zmm, imm8" xed="VREDUCEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_reduce_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="zmm {z}, zmm {sae}, imm8" xed="VREDUCEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="zmm, zmm, imm8" xed="VREDUCEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_reduce_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="zmm, zmm {sae}, imm8" xed="VREDUCEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="xmm {k}, xmm, imm8" xed="VREDUCEPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="xmm {z}, xmm, imm8" xed="VREDUCEPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_reduce_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed double-precision (64-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ReduceArgumentPD(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCEPD" form="xmm, xmm, imm8" xed="VREDUCEPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="ymm {k}, ymm, imm8" xed="VREDUCEPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="ymm {z}, ymm, imm8" xed="VREDUCEPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	RETURN tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="ymm, ymm, imm8" xed="VREDUCEPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="zmm {k}, zmm, imm8" xed="VREDUCEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_reduce_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="zmm {k}, zmm {sae}, imm8" xed="VREDUCEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="zmm {z}, zmm, imm8" xed="VREDUCEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_reduce_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="zmm {z}, zmm {sae}, imm8" xed="VREDUCEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="zmm, zmm, imm8" xed="VREDUCEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_reduce_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="zmm, zmm {sae}, imm8" xed="VREDUCEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="xmm {k}, xmm, imm8" xed="VREDUCEPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="xmm {z}, xmm, imm8" xed="VREDUCEPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_reduce_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of packed single-precision (32-bit) floating-point elements in "a" by the number of bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ReduceArgumentPS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCEPS" form="xmm, xmm, imm8" xed="VREDUCEPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_reduce_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESD" form="xmm {k}, xmm, xmm, imm8" xed="VREDUCESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_reduce_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESD" form="xmm {k}, xmm, xmm {sae}, imm8" xed="VREDUCESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_reduce_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESD" form="xmm {z}, xmm, xmm, imm8" xed="VREDUCESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_reduce_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESD" form="xmm {z}, xmm, xmm {sae}, imm8" xed="VREDUCESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_reduce_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESD" form="xmm, xmm, xmm, imm8" xed="VREDUCESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_reduce_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of the lower double-precision (64-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPD(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	tmp[63:0] := src1[63:0] - tmp[63:0]
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := FP64(0.0)
+	FI
+	RETURN tmp[63:0]
+}
+dst[63:0] := ReduceArgumentPD(b[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESD" form="xmm, xmm, xmm {sae}, imm8" xed="VREDUCESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_reduce_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESS" form="xmm {k}, xmm, xmm, imm8" xed="VREDUCESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_reduce_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESS" form="xmm {k}, xmm, xmm {sae}, imm8" xed="VREDUCESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_reduce_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESS" form="xmm {z}, xmm, xmm, imm8" xed="VREDUCESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_reduce_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESS" form="xmm {z}, xmm, xmm {sae}, imm8" xed="VREDUCESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_reduce_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESS" form="xmm, xmm, xmm, imm8" xed="VREDUCESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_reduce_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Extract the reduced argument of the lower single-precision (32-bit) floating-point element in "b" by the number of bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE ReduceArgumentPS(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	tmp[31:0] := src1[31:0] - tmp[31:0]
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := FP32(0.0)
+	FI
+	RETURN tmp[31:0]
+}
+dst[31:0] := ReduceArgumentPS(b[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VREDUCESS" form="xmm, xmm, xmm {sae}, imm8" xed="VREDUCESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VXORPD" form="ymm {k}, ymm, ymm" xed="VXORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VXORPD" form="ymm {z}, ymm, ymm" xed="VXORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VXORPD" form="zmm {k}, zmm, zmm" xed="VXORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VXORPD" form="zmm {z}, zmm, zmm" xed="VXORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VXORPD" form="zmm, zmm, zmm" xed="VXORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VXORPD" form="xmm {k}, xmm, xmm" xed="VXORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VXORPD" form="xmm {z}, xmm, xmm" xed="VXORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VXORPS" form="ymm {k}, ymm, ymm" xed="VXORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VXORPS" form="ymm {z}, ymm, ymm" xed="VXORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VXORPS" form="zmm {k}, zmm, zmm" xed="VXORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VXORPS" form="zmm {z}, zmm, zmm" xed="VXORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VXORPS" form="zmm, zmm, zmm" xed="VXORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VXORPS" form="xmm {k}, xmm, xmm" xed="VXORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VXORPS" form="xmm {z}, xmm, xmm" xed="VXORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kadd_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Add 8-bit masks in "a" and "b", and store the result in "k".</description>
+	<operation>
+k[7:0] := a[7:0] + b[7:0]
+k[MAX:8] := 0
+	</operation>
+	<instruction name="KADDB" form="k, k, k" xed="KADDB_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kadd_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Add 16-bit masks in "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] + b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KADDW" form="k, k, k" xed="KADDW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kand_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 8-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[7:0] := a[7:0] AND b[7:0]
+k[MAX:8] := 0
+	</operation>
+	<instruction name="KANDB" form="k, k, k" xed="KANDB_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kandn_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 8-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[7:0] := (NOT a[7:0]) AND b[7:0]
+k[MAX:8] := 0
+	</operation>
+	<instruction name="KANDNB" form="k, k, k" xed="KANDNB_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_knot_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<description>Compute the bitwise NOT of 8-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[7:0] := NOT a[7:0]
+k[MAX:8] := 0
+	</operation>
+	<instruction name="KNOTB" form="k, k" xed="KNOTB_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kor_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 8-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[7:0] := a[7:0] OR b[7:0]
+k[MAX:8] := 0
+	</operation>
+	<instruction name="KORB" form="k, k, k" xed="KORB_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxnor_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XNOR of 8-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[7:0] := NOT (a[7:0] XOR b[7:0])
+k[MAX:8] := 0
+	</operation>
+	<instruction name="KXNORB" form="k, k, k" xed="KXNORB_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxor_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XOR of 8-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[7:0] := a[7:0] XOR b[7:0]
+k[MAX:8] := 0
+	</operation>
+	<instruction name="KXORB" form="k, k, k" xed="KXORB_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftli_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 8-bit mask "a" left by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 7
+	k[7:0] := a[7:0] &lt;&lt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTLB" form="k, k, imm8" xed="KSHIFTLB_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftri_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 8-bit mask "a" right by "count" while shifting in zeros, and store the least significant 8 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 7
+	k[7:0] := a[7:0] &gt;&gt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTRB" form="k, k, imm8" xed="KSHIFTRB_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_load_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Load</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8*" varname="mem_addr" etype="MASK" memwidth="8"/>
+	<description>Load 8-bit mask from memory into "k".</description>
+	<operation>
+k[7:0] := MEM[mem_addr+7:mem_addr]
+	</operation>
+	<instruction name="KMOVB" form="k, m8" xed="KMOVB_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_store_mask8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__mmask8*" varname="mem_addr" etype="MASK" memwidth="8"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<description>Store 8-bit mask from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+7:mem_addr] := a[7:0]
+	</operation>
+	<instruction name="KMOVB" form="m8, k" xed="KMOVB_MEMu8_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortest_mask8_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="all_ones" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones".</description>
+	<operation>
+tmp[7:0] := a[7:0] OR b[7:0]
+IF tmp[7:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+IF tmp[7:0] == 0xFF
+	MEM[all_ones+7:all_ones] := 1
+ELSE
+	MEM[all_ones+7:all_ones] := 0
+FI
+	</operation>
+	<instruction name="KORTESTB" form="k, k" xed="KORTESTB_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestz_mask8_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[7:0] := a[7:0] OR b[7:0]
+IF tmp[7:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTB" form="k, k" xed="KORTESTB_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestc_mask8_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 8-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[7:0] := a[7:0] OR b[7:0]
+IF tmp[7:0] == 0xFF
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTB" form="k, k" xed="KORTESTB_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktest_mask8_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="and_not" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not".</description>
+	<operation>
+tmp1[7:0] := a[7:0] AND b[7:0]
+IF tmp1[7:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+tmp2[7:0] := (NOT a[7:0]) AND b[7:0]
+IF tmp2[7:0] == 0x0
+	MEM[and_not+7:and_not] := 1
+ELSE
+	MEM[and_not+7:and_not] := 0
+FI
+	</operation>
+	<instruction name="KTESTB" form="k, k" xed="KTESTB_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestz_mask8_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 8-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[7:0] := a[7:0] AND b[7:0]
+IF tmp[7:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTB" form="k, k" xed="KTESTB_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestc_mask8_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<parameter type="__mmask8" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 8-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[7:0] := (NOT a[7:0]) AND b[7:0]
+IF tmp[7:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTB" form="k, k" xed="KTESTB_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktest_mask16_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="and_not" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". Compute the bitwise NOT of "a" and then AND with "b", if the result is all zeros, store 1 in "and_not", otherwise store 0 in "and_not".</description>
+	<operation>
+tmp1[15:0] := a[15:0] AND b[15:0]
+IF tmp1[15:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+tmp2[15:0] := (NOT a[15:0]) AND b[15:0]
+IF tmp2[15:0] == 0x0
+	MEM[and_not+7:and_not] := 1
+ELSE
+	MEM[and_not+7:and_not] := 0
+FI
+	</operation>
+	<instruction name="KTESTW" form="k, k" xed="KTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestz_mask16_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and if the result is all zeros, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[15:0] := a[15:0] AND b[15:0]
+IF tmp[15:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTW" form="k, k" xed="KTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_ktestc_mask16_u8">
+	<type>Mask</type>
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 16-bit mask "a" and then AND with "b", if the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[15:0] := (NOT a[15:0]) AND b[15:0]
+IF tmp[15:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KTESTW" form="k, k" xed="KTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtmask8_u32">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="a" etype="MASK"/>
+	<description>Convert 8-bit mask "a" into an integer value, and store the result in "dst".</description>
+	<operation>
+dst := ZeroExtend32(a[7:0])
+	</operation>
+	<instruction name="KMOVB" form="r32, k" xed="KMOVB_GPR32u32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtu32_mask8">
+	<CPUID>AVX512DQ</CPUID>
+	<category>Mask</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="unsigned int" varname="a" etype="UI8"/>
+	<description>Convert integer value "a" into an 8-bit mask, and store the result in "k".</description>
+	<operation>
+k := a[7:0]
+	</operation>
+	<instruction name="KMOVB" form="k, r32" xed="KMOVB_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_exp2a23_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PS" form="zmm, zmm {sae}" xed="VEXP2PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_exp2a23_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PS" form="zmm, zmm" xed="VEXP2PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_exp2a23_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PS" form="zmm {k}, zmm {sae}" xed="VEXP2PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_exp2a23_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PS" form="zmm {k}, zmm" xed="VEXP2PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_exp2a23_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PS" form="zmm {z}, zmm {sae}" xed="VEXP2PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_exp2a23_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PS" form="zmm {z}, zmm" xed="VEXP2PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_exp2a23_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POW(2.0, a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PD" form="zmm, zmm {sae}" xed="VEXP2PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_exp2a23_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POW(2.0, a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PD" form="zmm, zmm" xed="VEXP2PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_exp2a23_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(2.0, a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PD" form="zmm {k}, zmm {sae}" xed="VEXP2PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_exp2a23_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(2.0, a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PD" form="zmm {k}, zmm" xed="VEXP2PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_exp2a23_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(2.0, a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PD" form="zmm {z}, zmm {sae}" xed="VEXP2PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_exp2a23_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-23.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(2.0, a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP2PD" form="zmm {z}, zmm" xed="VEXP2PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+dst[63:0] := (1.0 / b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SD" form="xmm, xmm, xmm {sae}" xed="VRCP28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[63:0] := (1.0 / b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SD" form="xmm, xmm, xmm" xed="VRCP28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SD" form="xmm {k}, xmm, xmm {sae}" xed="VRCP28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SD" form="xmm {k}, xmm, xmm" xed="VRCP28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SD" form="xmm {z}, xmm, xmm {sae}" xed="VRCP28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SD" form="xmm {z}, xmm, xmm" xed="VRCP28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst". The maximum relative error for this approximation is less than 2^-28, and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note]</description>
+	<operation>
+dst[31:0] := (1.0 / b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SS" form="xmm, xmm, xmm {sae}" xed="VRCP28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[31:0] := (1.0 / b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SS" form="xmm, xmm, xmm" xed="VRCP28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SS" form="xmm {k}, xmm, xmm {sae}" xed="VRCP28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SS" form="xmm {k}, xmm, xmm" xed="VRCP28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SS" form="xmm {z}, xmm, xmm {sae}" xed="VRCP28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP28SS" form="xmm {z}, xmm, xmm" xed="VRCP28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rcp28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PS" form="zmm, zmm {sae}" xed="VRCP28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rcp28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PS" form="zmm, zmm" xed="VRCP28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rcp28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PS" form="zmm {k}, zmm {sae}" xed="VRCP28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rcp28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PS" form="zmm {k}, zmm" xed="VRCP28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rcp28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PS" form="zmm {z}, zmm {sae}" xed="VRCP28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rcp28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PS" form="zmm {z}, zmm" xed="VRCP28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rcp28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1.0 / a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PD" form="zmm, zmm {sae}" xed="VRCP28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rcp28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1.0 / a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PD" form="zmm, zmm" xed="VRCP28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rcp28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PD" form="zmm {k}, zmm {sae}" xed="VRCP28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rcp28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PD" form="zmm {k}, zmm" xed="VRCP28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rcp28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PD" form="zmm {z}, zmm {sae}" xed="VRCP28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rcp28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRCP28PD" form="zmm {z}, zmm" xed="VRCP28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rsqrt28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+dst[63:0] := (1.0 / SQRT(b[63:0]))
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SD" form="xmm, xmm, xmm {sae}" xed="VRSQRT28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rsqrt28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[63:0] := (1.0 / SQRT(b[63:0]))
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SD" form="xmm, xmm, xmm" xed="VRSQRT28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SD" form="xmm {k}, xmm, xmm {sae}" xed="VRSQRT28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SD" form="xmm {k}, xmm, xmm" xed="VRSQRT28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt28_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SD" form="xmm {z}, xmm, xmm {sae}" xed="VRSQRT28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt28_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SD" form="xmm {z}, xmm, xmm" xed="VRSQRT28SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rsqrt28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+dst[31:0] := (1.0 / SQRT(b[31:0]))
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SS" form="xmm, xmm, xmm {sae}" xed="VRSQRT28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rsqrt28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+dst[31:0] := (1.0 / SQRT(b[31:0]))
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SS" form="xmm, xmm, xmm" xed="VRSQRT28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SS" form="xmm {k}, xmm, xmm {sae}" xed="VRSQRT28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SS" form="xmm {k}, xmm, xmm" xed="VRSQRT28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt28_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SS" form="xmm {z}, xmm, xmm {sae}" xed="VRSQRT28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt28_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT28SS" form="xmm {z}, xmm, xmm" xed="VRSQRT28SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rsqrt28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PS" form="zmm, zmm {sae}" xed="VRSQRT28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rsqrt28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PS" form="zmm, zmm" xed="VRSQRT28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rsqrt28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PS" form="zmm {k}, zmm {sae}" xed="VRSQRT28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rsqrt28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PS" form="zmm {k}, zmm" xed="VRSQRT28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rsqrt28_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PS" form="zmm {z}, zmm {sae}" xed="VRSQRT28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rsqrt28_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PS" form="zmm {z}, zmm" xed="VRSQRT28PS_ZMMf32_MASKmskw_ZMMf32_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rsqrt28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PD" form="zmm, zmm {sae}" xed="VRSQRT28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rsqrt28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst". The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PD" form="zmm, zmm" xed="VRSQRT28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rsqrt28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PD" form="zmm {k}, zmm {sae}" xed="VRSQRT28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rsqrt28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PD" form="zmm {k}, zmm" xed="VRSQRT28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rsqrt28_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28. [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PD" form="zmm {z}, zmm {sae}" xed="VRSQRT28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rsqrt28_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512ER</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-28.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VRSQRT28PD" form="zmm {z}, zmm" xed="VRSQRT28PD_ZMMf64_MASKmskw_ZMMf64_AVX512ER"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDPD" form="ymm {k}, ymm, ymm" xed="VADDPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDPD" form="ymm {z}, ymm, ymm" xed="VADDPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDPD" form="xmm {k}, xmm, xmm" xed="VADDPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDPD" form="xmm {z}, xmm, xmm" xed="VADDPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDPS" form="ymm {k}, ymm, ymm" xed="VADDPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VADDPS" form="ymm {z}, ymm, ymm" xed="VADDPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDPS" form="xmm {k}, xmm, xmm" xed="VADDPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDPS" form="xmm {z}, xmm, xmm" xed="VADDPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst".</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (32*imm8[2:0])
+dst[255:0] := temp[255:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VALIGND" form="ymm, ymm, ymm, imm8" xed="VALIGND_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (32*imm8[2:0])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VALIGND" form="ymm {k}, ymm, ymm, imm8" xed="VALIGND_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 32 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (32*imm8[2:0])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VALIGND" form="ymm {z}, ymm, ymm, imm8" xed="VALIGND_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst".</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (32*imm8[1:0])
+dst[127:0] := temp[127:0]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VALIGND" form="xmm, xmm, xmm, imm8" xed="VALIGND_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (32*imm8[1:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VALIGND" form="xmm {k}, xmm, xmm, imm8" xed="VALIGND_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 16 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (32*imm8[1:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VALIGND" form="xmm {z}, xmm, xmm, imm8" xed="VALIGND_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst".</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (64*imm8[1:0])
+dst[255:0] := temp[255:0]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="ymm, ymm, ymm, imm8" xed="VALIGNQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (64*imm8[1:0])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="ymm {k}, ymm, ymm, imm8" xed="VALIGNQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Concatenate "a" and "b" into a 64-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 32 bytes (4 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[511:256] := a[255:0]
+temp[255:0] := b[255:0]
+temp[511:0] := temp[511:0] &gt;&gt; (64*imm8[1:0])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="ymm {z}, ymm, ymm, imm8" xed="VALIGNQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst".</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (64*imm8[0])
+dst[127:0] := temp[127:0]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="xmm, xmm, xmm, imm8" xed="VALIGNQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (64*imm8[0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="xmm {k}, xmm, xmm, imm8" xed="VALIGNQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Concatenate "a" and "b" into a 32-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 16 bytes (2 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[255:128] := a[127:0]
+temp[127:0] := b[127:0]
+temp[255:0] := temp[255:0] &gt;&gt; (64*imm8[0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="xmm {z}, xmm, xmm, imm8" xed="VALIGNQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBLENDMPD" form="ymm {k}, ymm, ymm" xed="VBLENDMPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBLENDMPD" form="xmm {k}, xmm, xmm" xed="VBLENDMPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBLENDMPS" form="ymm {k}, ymm, ymm" xed="VBLENDMPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBLENDMPS" form="xmm {k}, xmm, xmm" xed="VBLENDMPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X4" form="ymm, m128" xed="VBROADCASTF32X4_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_mask_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X4" form="ymm {k}, m128" xed="VBROADCASTF32X4_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_maskz_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X4" form="ymm {z}, m128" xed="VBROADCASTF32X4_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_broadcast_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X4" form="ymm, m128" xed="VBROADCASTI32X4_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_mask_broadcast_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X4" form="ymm {k}, m128" xed="VBROADCASTI32X4_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm256_maskz_broadcast_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X4" form="ymm {z}, m128" xed="VBROADCASTI32X4_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSD" form="ymm {k}, xmm" xed="VBROADCASTSD_YMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSD" form="ymm {z}, xmm" xed="VBROADCASTSD_YMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="ymm {k}, xmm" xed="VBROADCASTSS_YMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="ymm {z}, xmm" xed="VBROADCASTSS_YMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="xmm {k}, xmm" xed="VBROADCASTSS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="xmm {z}, xmm" xed="VBROADCASTSS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, ymm, ymm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, ymm, ymm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, xmm, xmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, xmm, xmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, ymm, ymm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, ymm, ymm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, xmm, xmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, xmm, xmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCOMPRESSPD" form="ymm {k}, ymm" xed="VCOMPRESSPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compressstoreu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VCOMPRESSPD" form="m256 {k}, ymm" xed="VCOMPRESSPD_MEMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCOMPRESSPD" form="ymm {z}, ymm" xed="VCOMPRESSPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCOMPRESSPD" form="xmm {k}, xmm" xed="VCOMPRESSPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compressstoreu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VCOMPRESSPD" form="m128 {k}, xmm" xed="VCOMPRESSPD_MEMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCOMPRESSPD" form="xmm {z}, xmm" xed="VCOMPRESSPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCOMPRESSPS" form="ymm {k}, ymm" xed="VCOMPRESSPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compressstoreu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VCOMPRESSPS" form="m256 {k}, ymm" xed="VCOMPRESSPS_MEMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCOMPRESSPS" form="ymm {z}, ymm" xed="VCOMPRESSPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCOMPRESSPS" form="xmm {k}, xmm" xed="VCOMPRESSPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compressstoreu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VCOMPRESSPS" form="m128 {k}, xmm" xed="VCOMPRESSPS_MEMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCOMPRESSPS" form="xmm {z}, xmm" xed="VCOMPRESSPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := src[m+63:m]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="ymm {k}, xmm" xed="VCVTDQ2PD_YMMf64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="ymm {z}, xmm" xed="VCVTDQ2PD_YMMf64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := src[m+63:m]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="xmm {k}, xmm" xed="VCVTDQ2PD_XMMf64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="xmm {z}, xmm" xed="VCVTDQ2PD_XMMf64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="ymm {k}, ymm" xed="VCVTDQ2PS_YMMf32_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="ymm {z}, ymm" xed="VCVTDQ2PS_YMMf32_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="xmm {k}, xmm" xed="VCVTDQ2PS_XMMf32_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="xmm {z}, xmm" xed="VCVTDQ2PS_XMMf32_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="xmm {k}, ymm" xed="VCVTPD2DQ_XMMi32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="xmm {z}, ymm" xed="VCVTPD2DQ_XMMi32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="xmm {k}, xmm" xed="VCVTPD2DQ_XMMi32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="xmm {z}, xmm" xed="VCVTPD2DQ_XMMi32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="xmm {k}, ymm" xed="VCVTPD2PS_XMMf32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="xmm {z}, ymm" xed="VCVTPD2PS_XMMf32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="xmm {k}, xmm" xed="VCVTPD2PS_XMMf32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="xmm {z}, xmm" xed="VCVTPD2PS_XMMf32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="xmm, ymm" xed="VCVTPD2UDQ_XMMu32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="xmm {k}, ymm" xed="VCVTPD2UDQ_XMMu32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="xmm {z}, ymm" xed="VCVTPD2UDQ_XMMu32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="xmm, xmm" xed="VCVTPD2UDQ_XMMu32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="xmm {k}, xmm" xed="VCVTPD2UDQ_XMMu32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="xmm {z}, xmm" xed="VCVTPD2UDQ_XMMu32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="ymm {k}, xmm" xed="VCVTPH2PS_YMMf32_MASKmskw_XMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="ymm {z}, xmm" xed="VCVTPH2PS_YMMf32_MASKmskw_XMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="xmm {k}, xmm" xed="VCVTPH2PS_XMMf32_MASKmskw_XMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="xmm {z}, xmm" xed="VCVTPH2PS_XMMf32_MASKmskw_XMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="ymm {k}, ymm" xed="VCVTPS2DQ_YMMi32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="ymm {z}, ymm" xed="VCVTPS2DQ_YMMi32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="xmm {k}, xmm" xed="VCVTPS2DQ_XMMi32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="xmm {z}, xmm" xed="VCVTPS2DQ_XMMi32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {k}, ymm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {k}, ymm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {z}, ymm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {z}, ymm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {k}, xmm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {k}, xmm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {z}, xmm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_ROUND_MODE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm {z}, xmm, imm8" xed="VCVTPS2PH_XMMf16_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="ymm, ymm" xed="VCVTPS2UDQ_YMMu32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="ymm {k}, ymm" xed="VCVTPS2UDQ_YMMu32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="ymm {z}, ymm" xed="VCVTPS2UDQ_YMMu32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="xmm, xmm" xed="VCVTPS2UDQ_XMMu32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="xmm {k}, xmm" xed="VCVTPS2UDQ_XMMu32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="xmm {z}, xmm" xed="VCVTPS2UDQ_XMMu32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="xmm {k}, ymm" xed="VCVTTPD2DQ_XMMi32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="xmm {z}, ymm" xed="VCVTTPD2DQ_XMMi32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="xmm {k}, xmm" xed="VCVTTPD2DQ_XMMi32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="xmm {z}, xmm" xed="VCVTTPD2DQ_XMMi32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="xmm, ymm" xed="VCVTTPD2UDQ_XMMu32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="xmm {k}, ymm" xed="VCVTTPD2UDQ_XMMu32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="xmm {z}, ymm" xed="VCVTTPD2UDQ_XMMu32_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="xmm, xmm" xed="VCVTTPD2UDQ_XMMu32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="xmm {k}, xmm" xed="VCVTTPD2UDQ_XMMu32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="xmm {z}, xmm" xed="VCVTTPD2UDQ_XMMu32_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="ymm {k}, ymm" xed="VCVTTPS2DQ_YMMi32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="ymm {z}, ymm" xed="VCVTTPS2DQ_YMMi32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="xmm {k}, xmm" xed="VCVTTPS2DQ_XMMi32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="xmm {z}, xmm" xed="VCVTTPS2DQ_XMMi32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="ymm, ymm" xed="VCVTTPS2UDQ_YMMu32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="ymm {k}, ymm" xed="VCVTTPS2UDQ_YMMu32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="ymm {z}, ymm" xed="VCVTTPS2UDQ_YMMu32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="xmm, xmm" xed="VCVTTPS2UDQ_XMMu32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="xmm {k}, xmm" xed="VCVTTPS2UDQ_XMMu32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="xmm {z}, xmm" xed="VCVTTPS2UDQ_XMMu32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="ymm, xmm" xed="VCVTUDQ2PD_YMMf64_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_Int32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="ymm {k}, xmm" xed="VCVTUDQ2PD_YMMf64_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="ymm {z}, xmm" xed="VCVTUDQ2PD_YMMf64_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="xmm, xmm" xed="VCVTUDQ2PD_XMMf64_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="xmm {k}, xmm" xed="VCVTUDQ2PD_XMMf64_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="xmm {z}, xmm" xed="VCVTUDQ2PD_XMMf64_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDIVPD" form="ymm {k}, ymm, ymm" xed="VDIVPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDIVPD" form="ymm {z}, ymm, ymm" xed="VDIVPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVPD" form="xmm {k}, xmm, xmm" xed="VDIVPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVPD" form="xmm {z}, xmm, xmm" xed="VDIVPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDIVPS" form="ymm {k}, ymm, ymm" xed="VDIVPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDIVPS" form="ymm {z}, ymm, ymm" xed="VDIVPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVPS" form="xmm {k}, xmm, xmm" xed="VDIVPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVPS" form="xmm {z}, xmm, xmm" xed="VDIVPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="ymm {k}, ymm" xed="VEXPANDPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="ymm {k}, m256" xed="VEXPANDPD_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="ymm {z}, ymm" xed="VEXPANDPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="ymm {z}, m256" xed="VEXPANDPD_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="xmm {k}, xmm" xed="VEXPANDPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="xmm {k}, m128" xed="VEXPANDPD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="xmm {z}, xmm " xed="VEXPANDPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="xmm {z}, m128" xed="VEXPANDPD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="ymm {k}, ymm" xed="VEXPANDPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="ymm {k}, m256" xed="VEXPANDPS_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="ymm {z}, ymm" xed="VEXPANDPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="ymm {z}, m256" xed="VEXPANDPS_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="xmm {k}, xmm" xed="VEXPANDPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="xmm {k}, m128" xed="VEXPANDPS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="xmm {z}, xmm" xed="VEXPANDPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="xmm {z}, m128" xed="VEXPANDPS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X4" form="xmm, ymm, imm8" xed="VEXTRACTF32X4_XMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X4" form="xmm {k}, ymm, imm8" xed="VEXTRACTF32X4_XMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X4" form="xmm {z}, ymm, imm8" xed="VEXTRACTF32X4_XMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X4" form="xmm, ymm, imm8" xed="VEXTRACTI32X4_XMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X4" form="xmm {k}, ymm, imm8" xed="VEXTRACTI32X4_XMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X4" form="xmm {z}, ymm, imm8" xed="VEXTRACTI32X4_XMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN: j := 0
+	SNAN_TOKEN: j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="ymm, ymm, ymm, imm8" xed="VFIXUPIMMPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="ymm {k}, ymm, ymm, imm8" xed="VFIXUPIMMPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="ymm {z}, ymm, ymm, imm8" xed="VFIXUPIMMPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="xmm, xmm, xmm, imm8" xed="VFIXUPIMMPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="xmm {k}, xmm, xmm, imm8" xed="VFIXUPIMMPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="xmm {z}, xmm, xmm, imm8" xed="VFIXUPIMMPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="ymm, ymm, ymm, imm8" xed="VFIXUPIMMPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="ymm {k}, ymm, ymm, imm8" xed="VFIXUPIMMPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="ymm {z}, ymm, ymm, imm8" xed="VFIXUPIMMPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="xmm, xmm, xmm, imm8" xed="VFIXUPIMMPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="xmm {k}, xmm, xmm, imm8" xed="VFIXUPIMMPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="xmm {z}, xmm, xmm, imm8" xed="VFIXUPIMMPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="ymm {k}, ymm, ymm" xed="VFMADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="ymm {k}, ymm, ymm" xed="VFMADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="ymm {k}, ymm, ymm" xed="VFMADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="ymm {k}, ymm, ymm" xed="VFMADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="ymm {k}, ymm, ymm" xed="VFMADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="ymm {k}, ymm, ymm" xed="VFMADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="ymm {z}, ymm, ymm" xed="VFMADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="ymm {z}, ymm, ymm" xed="VFMADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="ymm {z}, ymm, ymm" xed="VFMADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="xmm {k}, xmm, xmm" xed="VFMADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="xmm {k}, xmm, xmm" xed="VFMADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="xmm {k}, xmm, xmm" xed="VFMADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="xmm {k}, xmm, xmm" xed="VFMADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="xmm {k}, xmm, xmm" xed="VFMADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="xmm {k}, xmm, xmm" xed="VFMADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="xmm {z}, xmm, xmm" xed="VFMADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="xmm {z}, xmm, xmm" xed="VFMADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="xmm {z}, xmm, xmm" xed="VFMADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="ymm {k}, ymm, ymm" xed="VFMADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="ymm {k}, ymm, ymm" xed="VFMADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="ymm {k}, ymm, ymm" xed="VFMADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="ymm {k}, ymm, ymm" xed="VFMADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="ymm {k}, ymm, ymm" xed="VFMADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="ymm {k}, ymm, ymm" xed="VFMADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="ymm {z}, ymm, ymm" xed="VFMADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="ymm {z}, ymm, ymm" xed="VFMADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="ymm {z}, ymm, ymm" xed="VFMADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="xmm {k}, xmm, xmm" xed="VFMADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="xmm {k}, xmm, xmm" xed="VFMADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="xmm {k}, xmm, xmm" xed="VFMADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="xmm {k}, xmm, xmm" xed="VFMADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="xmm {k}, xmm, xmm" xed="VFMADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="xmm {k}, xmm, xmm" xed="VFMADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="xmm {z}, xmm, xmm" xed="VFMADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="xmm {z}, xmm, xmm" xed="VFMADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="xmm {z}, xmm, xmm" xed="VFMADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="ymm {k}, ymm, ymm" xed="VFMADDSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="ymm {k}, ymm, ymm" xed="VFMADDSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="ymm {k}, ymm, ymm" xed="VFMADDSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="ymm {k}, ymm, ymm" xed="VFMADDSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="ymm {k}, ymm, ymm" xed="VFMADDSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="ymm {k}, ymm, ymm" xed="VFMADDSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="ymm {z}, ymm, ymm" xed="VFMADDSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="ymm {z}, ymm, ymm" xed="VFMADDSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="ymm {z}, ymm, ymm" xed="VFMADDSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="xmm {k}, xmm, xmm" xed="VFMADDSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="xmm {k}, xmm, xmm" xed="VFMADDSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="xmm {k}, xmm, xmm" xed="VFMADDSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="xmm {k}, xmm, xmm" xed="VFMADDSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="xmm {k}, xmm, xmm" xed="VFMADDSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="xmm {k}, xmm, xmm" xed="VFMADDSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="xmm {z}, xmm, xmm" xed="VFMADDSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="xmm {z}, xmm, xmm" xed="VFMADDSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="xmm {z}, xmm, xmm" xed="VFMADDSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="ymm {k}, ymm, ymm" xed="VFMADDSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="ymm {k}, ymm, ymm" xed="VFMADDSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="ymm {k}, ymm, ymm" xed="VFMADDSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="ymm {k}, ymm, ymm" xed="VFMADDSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="ymm {k}, ymm, ymm" xed="VFMADDSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="ymm {k}, ymm, ymm" xed="VFMADDSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="ymm {z}, ymm, ymm" xed="VFMADDSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="ymm {z}, ymm, ymm" xed="VFMADDSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="ymm {z}, ymm, ymm" xed="VFMADDSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="xmm {k}, xmm, xmm" xed="VFMADDSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="xmm {k}, xmm, xmm" xed="VFMADDSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="xmm {k}, xmm, xmm" xed="VFMADDSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="xmm {k}, xmm, xmm" xed="VFMADDSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="xmm {k}, xmm, xmm" xed="VFMADDSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="xmm {k}, xmm, xmm" xed="VFMADDSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="xmm {z}, xmm, xmm" xed="VFMADDSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="xmm {z}, xmm, xmm" xed="VFMADDSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="xmm {z}, xmm, xmm" xed="VFMADDSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="ymm {k}, ymm, ymm" xed="VFMSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="ymm {k}, ymm, ymm" xed="VFMSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="ymm {k}, ymm, ymm" xed="VFMSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="ymm {k}, ymm, ymm" xed="VFMSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="ymm {k}, ymm, ymm" xed="VFMSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="ymm {k}, ymm, ymm" xed="VFMSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="ymm {z}, ymm, ymm" xed="VFMSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="ymm {z}, ymm, ymm" xed="VFMSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="ymm {z}, ymm, ymm" xed="VFMSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="xmm {k}, xmm, xmm" xed="VFMSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="xmm {k}, xmm, xmm" xed="VFMSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="xmm {k}, xmm, xmm" xed="VFMSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="xmm {k}, xmm, xmm" xed="VFMSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="xmm {k}, xmm, xmm" xed="VFMSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="xmm {k}, xmm, xmm" xed="VFMSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="xmm {z}, xmm, xmm" xed="VFMSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="xmm {z}, xmm, xmm" xed="VFMSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="xmm {z}, xmm, xmm" xed="VFMSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="ymm {k}, ymm, ymm" xed="VFMSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="ymm {k}, ymm, ymm" xed="VFMSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="ymm {k}, ymm, ymm" xed="VFMSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="ymm {k}, ymm, ymm" xed="VFMSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="ymm {k}, ymm, ymm" xed="VFMSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="ymm {k}, ymm, ymm" xed="VFMSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="ymm {z}, ymm, ymm" xed="VFMSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="ymm {z}, ymm, ymm" xed="VFMSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="ymm {z}, ymm, ymm" xed="VFMSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="xmm {k}, xmm, xmm" xed="VFMSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="xmm {k}, xmm, xmm" xed="VFMSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="xmm {k}, xmm, xmm" xed="VFMSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="xmm {k}, xmm, xmm" xed="VFMSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="xmm {k}, xmm, xmm" xed="VFMSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="xmm {k}, xmm, xmm" xed="VFMSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="xmm {z}, xmm, xmm" xed="VFMSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="xmm {z}, xmm, xmm" xed="VFMSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="xmm {z}, xmm, xmm" xed="VFMSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="ymm {k}, ymm, ymm" xed="VFMSUBADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="ymm {k}, ymm, ymm" xed="VFMSUBADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="ymm {k}, ymm, ymm" xed="VFMSUBADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="ymm {k}, ymm, ymm" xed="VFMSUBADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="ymm {k}, ymm, ymm" xed="VFMSUBADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="ymm {k}, ymm, ymm" xed="VFMSUBADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="ymm {z}, ymm, ymm" xed="VFMSUBADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="ymm {z}, ymm, ymm" xed="VFMSUBADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="ymm {z}, ymm, ymm" xed="VFMSUBADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="xmm {k}, xmm, xmm" xed="VFMSUBADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="xmm {k}, xmm, xmm" xed="VFMSUBADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="xmm {k}, xmm, xmm" xed="VFMSUBADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1 
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="xmm {k}, xmm, xmm" xed="VFMSUBADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="xmm {k}, xmm, xmm" xed="VFMSUBADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="xmm {k}, xmm, xmm" xed="VFMSUBADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="xmm {z}, xmm, xmm" xed="VFMSUBADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="xmm {z}, xmm, xmm" xed="VFMSUBADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="xmm {z}, xmm, xmm" xed="VFMSUBADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="ymm {k}, ymm, ymm" xed="VFMSUBADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="ymm {k}, ymm, ymm" xed="VFMSUBADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="ymm {k}, ymm, ymm" xed="VFMSUBADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="ymm {k}, ymm, ymm" xed="VFMSUBADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="ymm {k}, ymm, ymm" xed="VFMSUBADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="ymm {k}, ymm, ymm" xed="VFMSUBADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="ymm {z}, ymm, ymm" xed="VFMSUBADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="ymm {z}, ymm, ymm" xed="VFMSUBADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="ymm {z}, ymm, ymm" xed="VFMSUBADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="xmm {k}, xmm, xmm" xed="VFMSUBADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="xmm {k}, xmm, xmm" xed="VFMSUBADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="xmm {k}, xmm, xmm" xed="VFMSUBADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="xmm {k}, xmm, xmm" xed="VFMSUBADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="xmm {k}, xmm, xmm" xed="VFMSUBADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="xmm {k}, xmm, xmm" xed="VFMSUBADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0) 
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="xmm {z}, xmm, xmm" xed="VFMSUBADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="xmm {z}, xmm, xmm" xed="VFMSUBADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="xmm {z}, xmm, xmm" xed="VFMSUBADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="ymm {k}, ymm, ymm" xed="VFNMADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="ymm {k}, ymm, ymm" xed="VFNMADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="ymm {k}, ymm, ymm" xed="VFNMADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="ymm {k}, ymm, ymm" xed="VFNMADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="ymm {k}, ymm, ymm" xed="VFNMADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="ymm {k}, ymm, ymm" xed="VFNMADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="ymm {z}, ymm, ymm" xed="VFNMADD132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="ymm {z}, ymm, ymm" xed="VFNMADD213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="ymm {z}, ymm, ymm" xed="VFNMADD231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="xmm {k}, xmm, xmm" xed="VFNMADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="xmm {k}, xmm, xmm" xed="VFNMADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="xmm {k}, xmm, xmm" xed="VFNMADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="xmm {k}, xmm, xmm" xed="VFNMADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="xmm {k}, xmm, xmm" xed="VFNMADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="xmm {k}, xmm, xmm" xed="VFNMADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="xmm {z}, xmm, xmm" xed="VFNMADD132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="xmm {z}, xmm, xmm" xed="VFNMADD213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="xmm {z}, xmm, xmm" xed="VFNMADD231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="ymm {k}, ymm, ymm" xed="VFNMADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="ymm {k}, ymm, ymm" xed="VFNMADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="ymm {k}, ymm, ymm" xed="VFNMADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="ymm {k}, ymm, ymm" xed="VFNMADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="ymm {k}, ymm, ymm" xed="VFNMADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="ymm {k}, ymm, ymm" xed="VFNMADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="ymm {z}, ymm, ymm" xed="VFNMADD132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="ymm {z}, ymm, ymm" xed="VFNMADD213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="ymm {z}, ymm, ymm" xed="VFNMADD231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="xmm {k}, xmm, xmm" xed="VFNMADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="xmm {k}, xmm, xmm" xed="VFNMADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="xmm {k}, xmm, xmm" xed="VFNMADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="xmm {k}, xmm, xmm" xed="VFNMADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="xmm {k}, xmm, xmm" xed="VFNMADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="xmm {k}, xmm, xmm" xed="VFNMADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="xmm {z}, xmm, xmm" xed="VFNMADD132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="xmm {z}, xmm, xmm" xed="VFNMADD213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="xmm {z}, xmm, xmm" xed="VFNMADD231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="ymm {k}, ymm, ymm" xed="VFNMSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="ymm {k}, ymm, ymm" xed="VFNMSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="ymm {k}, ymm, ymm" xed="VFNMSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="ymm {k}, ymm, ymm" xed="VFNMSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="ymm {k}, ymm, ymm" xed="VFNMSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="ymm {k}, ymm, ymm" xed="VFNMSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="ymm {z}, ymm, ymm" xed="VFNMSUB132PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="ymm {z}, ymm, ymm" xed="VFNMSUB213PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="ymm {z}, ymm, ymm" xed="VFNMSUB231PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="xmm {k}, xmm, xmm" xed="VFNMSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="xmm {k}, xmm, xmm" xed="VFNMSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="xmm {k}, xmm, xmm" xed="VFNMSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="xmm {k}, xmm, xmm" xed="VFNMSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="xmm {k}, xmm, xmm" xed="VFNMSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="xmm {k}, xmm, xmm" xed="VFNMSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="xmm {z}, xmm, xmm" xed="VFNMSUB132PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="xmm {z}, xmm, xmm" xed="VFNMSUB213PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="xmm {z}, xmm, xmm" xed="VFNMSUB231PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask3_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="ymm {k}, ymm, ymm" xed="VFNMSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="ymm {k}, ymm, ymm" xed="VFNMSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="ymm {k}, ymm, ymm" xed="VFNMSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="ymm {k}, ymm, ymm" xed="VFNMSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="ymm {k}, ymm, ymm" xed="VFNMSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="ymm {k}, ymm, ymm" xed="VFNMSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="ymm {z}, ymm, ymm" xed="VFNMSUB132PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="ymm {z}, ymm, ymm" xed="VFNMSUB213PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="ymm {z}, ymm, ymm" xed="VFNMSUB231PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="xmm {k}, xmm, xmm" xed="VFNMSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="xmm {k}, xmm, xmm" xed="VFNMSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="xmm {k}, xmm, xmm" xed="VFNMSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="xmm {k}, xmm, xmm" xed="VFNMSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="xmm {k}, xmm, xmm" xed="VFNMSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="xmm {k}, xmm, xmm" xed="VFNMSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="xmm {z}, xmm, xmm" xed="VFNMSUB132PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="xmm {z}, xmm, xmm" xed="VFNMSUB213PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="xmm {z}, xmm, xmm" xed="VFNMSUB231PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="ymm {k}, vm32x" xed="VGATHERDPD_YMMf64_MASKmskw_MEMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="xmm {k}, vm32x" xed="VGATHERDPD_XMMf64_MASKmskw_MEMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="ymm {k}, vm32y" xed="VGATHERDPS_YMMf32_MASKmskw_MEMf32_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="xmm {k}, vm32x" xed="VGATHERDPS_XMMf32_MASKmskw_MEMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="ymm {k}, vm64y" xed="VGATHERQPD_YMMf64_MASKmskw_MEMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="xmm {k}, vm64x" xed="VGATHERQPD_XMMf64_MASKmskw_MEMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="ymm {k}, vm64y" xed="VGATHERQPS_YMMf32_MASKmskw_MEMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="xmm {k}, vm64x" xed="VGATHERQPS_XMMf32_MASKmskw_MEMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="ymm, ymm" xed="VGETEXPPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="ymm {k}, ymm" xed="VGETEXPPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="ymm {z}, ymm" xed="VGETEXPPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="xmm, xmm" xed="VGETEXPPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="xmm {k}, xmm" xed="VGETEXPPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="xmm {z}, xmm" xed="VGETEXPPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="ymm, ymm" xed="VGETEXPPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="ymm {k}, ymm" xed="VGETEXPPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="ymm {z}, ymm" xed="VGETEXPPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="xmm, xmm" xed="VGETEXPPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="xmm {k}, xmm" xed="VGETEXPPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="xmm {z}, xmm" xed="VGETEXPPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="ymm, ymm, imm8" xed="VGETMANTPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="ymm {k}, ymm, imm8" xed="VGETMANTPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="ymm {z}, ymm, imm8" xed="VGETMANTPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="xmm, xmm, imm8" xed="VGETMANTPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="xmm {k}, xmm, imm8" xed="VGETMANTPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="xmm {z}, xmm, imm8" xed="VGETMANTPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="ymm, ymm, imm8" xed="VGETMANTPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="ymm {k}, ymm, imm8" xed="VGETMANTPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="ymm {z}, ymm, imm8" xed="VGETMANTPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="xmm, xmm, imm8" xed="VGETMANTPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="xmm {k}, xmm, imm8" xed="VGETMANTPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="xmm {z}, xmm, imm8" xed="VGETMANTPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF32X4" form="ymm, ymm, xmm, imm8" xed="VINSERTF32X4_YMMf32_MASKmskw_YMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF32X4" form="ymm {k}, ymm, xmm, imm8" xed="VINSERTF32X4_YMMf32_MASKmskw_YMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTF32X4" form="ymm {z}, ymm, xmm, imm8" xed="VINSERTF32X4_YMMf32_MASKmskw_YMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_inserti32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTI32X4" form="ymm, ymm, xmm, imm8" xed="VINSERTI32X4_YMMu32_MASKmskw_YMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_inserti32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTI32X4" form="ymm {k}, ymm, xmm, imm8" xed="VINSERTI32X4_YMMu32_MASKmskw_YMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_inserti32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[255:0] := a[255:0]
+CASE (imm8[0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VINSERTI32X4" form="ymm {z}, ymm, xmm, imm8" xed="VINSERTI32X4_YMMu32_MASKmskw_YMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMAXPD" form="ymm {k}, ymm, ymm" xed="VMAXPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMAXPD" form="ymm {z}, ymm, ymm" xed="VMAXPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXPD" form="xmm {k}, xmm, xmm" xed="VMAXPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXPD" form="xmm {z}, xmm, xmm" xed="VMAXPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMAXPS" form="ymm {k}, ymm, ymm" xed="VMAXPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMAXPS" form="ymm {z}, ymm, ymm" xed="VMAXPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXPS" form="xmm {k}, xmm, xmm" xed="VMAXPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXPS" form="xmm {z}, xmm, xmm" xed="VMAXPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMINPD" form="ymm {k}, ymm, ymm" xed="VMINPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMINPD" form="ymm {z}, ymm, ymm" xed="VMINPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINPD" form="xmm {k}, xmm, xmm" xed="VMINPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINPD" form="xmm {z}, xmm, xmm" xed="VMINPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMINPS" form="ymm {k}, ymm, ymm" xed="VMINPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMINPS" form="ymm {z}, ymm, ymm" xed="VMINPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINPS" form="xmm {k}, xmm, xmm" xed="VMINPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINPS" form="xmm {z}, xmm, xmm" xed="VMINPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="ymm {k}, m64" xed="VMOVAPD_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="ymm {k}, ymm" xed="VMOVAPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPD" form="m256 {k}, ymm" xed="VMOVAPD_MEMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="ymm {z}, m256" xed="VMOVAPD_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="ymm {z}, ymm" xed="VMOVAPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="xmm {k}, m128" xed="VMOVAPD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="xmm {k}, xmm" xed="VMOVAPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPD" form="m128 {k}, xmm" xed="VMOVAPD_MEMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="xmm {z}, m128" xed="VMOVAPD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="xmm {z}, xmm" xed="VMOVAPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="ymm {k}, m256" xed="VMOVAPS_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="ymm {k}, ymm" xed="VMOVAPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPS" form="m256 {k}, ymm" xed="VMOVAPS_MEMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="ymm {z}, m256" xed="VMOVAPS_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="ymm {z}, ymm" xed="VMOVAPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="xmm {k}, m128" xed="VMOVAPS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="xmm {k}, xmm" xed="VMOVAPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPS" form="m128 {k}, xmm" xed="VMOVAPS_MEMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="xmm {z}, m128" xed="VMOVAPS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="xmm {z}, xmm" xed="VMOVAPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="ymm {k}, ymm" xed="VMOVDDUP_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="ymm {z}, ymm" xed="VMOVDDUP_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="xmm {k}, xmm" xed="VMOVDDUP_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="xmm {z}, xmm" xed="VMOVDDUP_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="ymm {k}, m64" xed="VMOVDQA32_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="ymm {k}, ymm" xed="VMOVDQA32_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA32" form="m256 {k}, ymm" xed="VMOVDQA32_MEMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="ymm {z}, m64" xed="VMOVDQA32_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="ymm {z}, ymm" xed="VMOVDQA32_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="xmm {k}, m64" xed="VMOVDQA32_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="xmm {k}, xmm" xed="VMOVDQA32_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA32" form="m128 {k}, xmm" xed="VMOVDQA32_MEMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="xmm {z}, m64" xed="VMOVDQA32_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="xmm {z}, xmm" xed="VMOVDQA32_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="ymm {k}, m64" xed="VMOVDQA64_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="ymm {k}, ymm" xed="VMOVDQA64_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA64" form="m256 {k}, ymm" xed="VMOVDQA64_MEMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="ymm {z}, m64" xed="VMOVDQA64_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="ymm {z}, ymm" xed="VMOVDQA64_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="xmm {k}, m64" xed="VMOVDQA64_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="xmm {k}, xmm" xed="VMOVDQA64_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA64" form="m128 {k}, xmm" xed="VMOVDQA64_MEMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="xmm {z}, m64" xed="VMOVDQA64_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="xmm {z}, xmm" xed="VMOVDQA64_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="ymm {k}, m64" xed="VMOVDQU32_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU32" form="m256 {k}, ymm" xed="VMOVDQU32_MEMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="ymm {z}, m64" xed="VMOVDQU32_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="xmm {k}, m64" xed="VMOVDQU32_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU32" form="m128 {k}, xmm" xed="VMOVDQU32_MEMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="xmm {z}, m64" xed="VMOVDQU32_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="ymm {k}, m64" xed="VMOVDQU64_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU64" form="m256 {k}, ymm" xed="VMOVDQU64_MEMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="ymm {z}, m64" xed="VMOVDQU64_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="xmm {k}, m64" xed="VMOVDQU64_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU64" form="m128 {k}, xmm" xed="VMOVDQU64_MEMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="xmm {z}, m64" xed="VMOVDQU64_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="ymm {k}, ymm" xed="VMOVSHDUP_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="ymm {z}, ymm" xed="VMOVSHDUP_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="xmm {k}, xmm" xed="VMOVSHDUP_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="xmm {z}, xmm" xed="VMOVSHDUP_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="ymm {k}, ymm" xed="VMOVSLDUP_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="ymm {z}, ymm" xed="VMOVSLDUP_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="xmm {k}, xmm" xed="VMOVSLDUP_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="xmm {z}, xmm" xed="VMOVSLDUP_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="ymm {k}, m64" xed="VMOVUPD_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVUPD" form="m256 {k}, ymm" xed="VMOVUPD_MEMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="256"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="ymm {z}, m64" xed="VMOVUPD_YMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="xmm {k}, m64" xed="VMOVUPD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVUPD" form="m128 {k}, xmm" xed="VMOVUPD_MEMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="xmm {z}, m64" xed="VMOVUPD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="ymm {k}, m64" xed="VMOVUPS_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVUPS" form="m256 {k}, ymm" xed="VMOVUPS_MEMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="256"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="ymm {z}, m64" xed="VMOVUPS_YMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="xmm {k}, m64" xed="VMOVUPS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVUPS" form="m128 {k}, xmm" xed="VMOVUPS_MEMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="xmm {z}, m64" xed="VMOVUPS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMULPD" form="ymm {k}, ymm, ymm" xed="VMULPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMULPD" form="ymm {z}, ymm, ymm" xed="VMULPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULPD" form="xmm {k}, xmm, xmm" xed="VMULPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULPD" form="xmm {z}, xmm, xmm" xed="VMULPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMULPS" form="ymm {k}, ymm, ymm" xed="VMULPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMULPS" form="ymm {z}, ymm, ymm" xed="VMULPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULPS" form="xmm {k}, xmm, xmm" xed="VMULPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULPS" form="xmm {z}, xmm, xmm" xed="VMULPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSD" form="ymm {k}, ymm" xed="VPABSD_YMMi32_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSD" form="ymm {z}, ymm" xed="VPABSD_YMMi32_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSD" form="xmm {k}, xmm" xed="VPABSD_XMMi32_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSD" form="xmm {z}, xmm" xed="VPABSD_XMMi32_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ABS(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSQ" form="ymm, ymm" xed="VPABSQ_YMMi64_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSQ" form="ymm {k}, ymm" xed="VPABSQ_YMMi64_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPABSQ" form="ymm {z}, ymm" xed="VPABSQ_YMMi64_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ABS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSQ" form="xmm, xmm" xed="VPABSQ_XMMi64_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSQ" form="xmm {k}, xmm" xed="VPABSQ_XMMi64_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPABSQ" form="xmm {z}, xmm" xed="VPABSQ_XMMi64_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDD" form="ymm {k}, ymm, ymm" xed="VPADDD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDD" form="ymm {z}, ymm, ymm" xed="VPADDD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDD" form="xmm {k}, xmm, xmm" xed="VPADDD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDD" form="xmm {z}, xmm, xmm" xed="VPADDD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDQ" form="ymm {k}, ymm, ymm" xed="VPADDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] :=0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPADDQ" form="ymm {z}, ymm, ymm" xed="VPADDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDQ" form="xmm {k}, xmm, xmm" xed="VPADDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPADDQ" form="xmm {z}, xmm, xmm" xed="VPADDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDD" form="ymm {k}, ymm, ymm" xed="VPANDD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDD" form="ymm {z}, ymm, ymm" xed="VPANDD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDD" form="xmm {k}, xmm, xmm" xed="VPANDD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDD" form="xmm {z}, xmm, xmm" xed="VPANDD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDND" form="ymm {k}, ymm, ymm" xed="VPANDND_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDND" form="ymm {z}, ymm, ymm" xed="VPANDND_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDND" form="xmm {k}, xmm, xmm" xed="VPANDND_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDND" form="xmm {z}, xmm, xmm" xed="VPANDND_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDNQ" form="ymm {k}, ymm, ymm" xed="VPANDNQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDNQ" form="ymm {z}, ymm, ymm" xed="VPANDNQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDNQ" form="xmm {k}, xmm, xmm" xed="VPANDNQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDNQ" form="xmm {z}, xmm, xmm" xed="VPANDNQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDQ" form="ymm {k}, ymm, ymm" xed="VPANDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPANDQ" form="ymm {z}, ymm, ymm" xed="VPANDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDQ" form="xmm {k}, xmm, xmm" xed="VPANDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPANDQ" form="xmm {z}, xmm, xmm" xed="VPANDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBLENDMD" form="ymm {k}, ymm, ymm" xed="VPBLENDMD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBLENDMD" form="xmm {k}, xmm, xmm" xed="VPBLENDMD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_blend_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBLENDMQ" form="ymm {k}, ymm, ymm" xed="VPBLENDMQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_blend_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBLENDMQ" form="xmm {k}, xmm, xmm" xed="VPBLENDMQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="ymm {k}, xmm" xed="VPBROADCASTD_YMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="ymm {k}, r32" xed="VPBROADCASTD_YMMu32_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="ymm {z}, xmm" xed="VPBROADCASTD_YMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="ymm {z}, r32" xed="VPBROADCASTD_YMMu32_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="xmm {k}, xmm" xed="VPBROADCASTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="xmm {k}, r32" xed="VPBROADCASTD_XMMu32_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="xmm {z}, xmm" xed="VPBROADCASTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="xmm {z}, r32" xed="VPBROADCASTD_XMMu32_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="ymm {k}, xmm" xed="VPBROADCASTQ_YMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="ymm {k}, r64" xed="VPBROADCASTQ_YMMu64_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="ymm {z}, xmm" xed="VPBROADCASTQ_YMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="ymm {z}, r64" xed="VPBROADCASTQ_YMMu64_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="xmm {k}, xmm" xed="VPBROADCASTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="xmm {k}, r64" xed="VPBROADCASTQ_XMMu64_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="xmm {z}, xmm" xed="VPBROADCASTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="xmm {z}, r64" xed="VPBROADCASTQ_XMMu64_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, ymm, ymm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, ymm, ymm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, ymm, ymm" xed="VPCMPD_MASKmskw_MASKmskw_YMMi32_YMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, xmm, xmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, xmm, xmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, xmm, xmm" xed="VPCMPD_MASKmskw_MASKmskw_XMMi32_XMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, ymm, ymm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, ymm, ymm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, ymm, ymm" xed="VPCMPQ_MASKmskw_MASKmskw_YMMi64_YMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, xmm, xmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, xmm, xmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, xmm, xmm" xed="VPCMPQ_MASKmskw_MASKmskw_XMMi64_XMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, ymm, ymm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, ymm, ymm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, ymm, ymm" xed="VPCMPUD_MASKmskw_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, xmm, xmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, xmm, xmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, xmm, xmm" xed="VPCMPUD_MASKmskw_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, ymm, ymm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, ymm, ymm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, ymm, ymm" xed="VPCMPUQ_MASKmskw_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, xmm, xmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, xmm, xmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, xmm, xmm" xed="VPCMPUQ_MASKmskw_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSD" form="ymm {k}, ymm" xed="VPCOMPRESSD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compressstoreu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSD" form="m256 {k}, ymm" xed="VPCOMPRESSD_MEMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSD" form="ymm {z}, ymm" xed="VPCOMPRESSD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSD" form="xmm {k}, xmm" xed="VPCOMPRESSD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compressstoreu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSD" form="m128 {k}, xmm" xed="VPCOMPRESSD_MEMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSD" form="xmm {z}, xmm" xed="VPCOMPRESSD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="ymm {k}, ymm" xed="VPCOMPRESSQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compressstoreu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="m256 {k}, ymm" xed="VPCOMPRESSQ_MEMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="ymm {z}, ymm" xed="VPCOMPRESSQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="xmm {k}, xmm" xed="VPCOMPRESSQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compressstoreu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="m128 {k}, xmm" xed="VPCOMPRESSQ_MEMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="xmm {z}, xmm" xed="VPCOMPRESSQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMD" form="ymm {k}, ymm, ymm" xed="VPERMD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMD" form="ymm {z}, ymm, ymm" xed="VPERMD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMD" form="ymm, ymm, ymm" xed="VPERMD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask2_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="ymm {k}, ymm, ymm" xed="VPERMI2D_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMT2D" form="ymm {k}, ymm, ymm" xed="VPERMT2D_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="ymm {z}, ymm, ymm" xed="VPERMI2D_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<instruction name="VPERMT2D" form="ymm {z}, ymm, ymm" xed="VPERMT2D_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="ymm, ymm, ymm" xed="VPERMI2D_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<instruction name="VPERMT2D" form="ymm, ymm, ymm" xed="VPERMT2D_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask2_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="xmm {k}, xmm, xmm" xed="VPERMI2D_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMT2D" form="xmm {k}, xmm, xmm" xed="VPERMT2D_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="xmm {z}, xmm, xmm" xed="VPERMI2D_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<instruction name="VPERMT2D" form="xmm {z}, xmm, xmm" xed="VPERMT2D_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="xmm, xmm, xmm" xed="VPERMI2D_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<instruction name="VPERMT2D" form="xmm, xmm, xmm" xed="VPERMT2D_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask2_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="ymm {k}, ymm, ymm" xed="VPERMI2PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMT2PD" form="ymm {k}, ymm, ymm" xed="VPERMT2PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="ymm {z}, ymm, ymm" xed="VPERMI2PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VPERMT2PD" form="ymm {z}, ymm, ymm" xed="VPERMT2PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="ymm, ymm, ymm" xed="VPERMI2PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<instruction name="VPERMT2PD" form="ymm, ymm, ymm" xed="VPERMT2PD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask2_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set)</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="xmm {k}, xmm, xmm" xed="VPERMI2PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMT2PD" form="xmm {k}, xmm, xmm" xed="VPERMT2PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="xmm {z}, xmm, xmm" xed="VPERMI2PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VPERMT2PD" form="xmm {z}, xmm, xmm" xed="VPERMT2PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="xmm, xmm, xmm" xed="VPERMI2PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VPERMT2PD" form="xmm, xmm, xmm" xed="VPERMT2PD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask2_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="ymm {k}, ymm, ymm" xed="VPERMI2PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMT2PS" form="ymm {k}, ymm, ymm" xed="VPERMT2PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+3]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="ymm {z}, ymm, ymm" xed="VPERMI2PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VPERMT2PS" form="ymm {z}, ymm, ymm" xed="VPERMT2PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	off := idx[i+2:i]*32
+	dst[i+31:i] := idx[i+3] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="ymm, ymm, ymm" xed="VPERMI2PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<instruction name="VPERMT2PS" form="ymm, ymm, ymm" xed="VPERMT2PS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask2_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="xmm {k}, xmm, xmm" xed="VPERMI2PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMT2PS" form="xmm {k}, xmm, xmm" xed="VPERMT2PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+2]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="xmm {z}, xmm, xmm" xed="VPERMI2PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VPERMT2PS" form="xmm {z}, xmm, xmm" xed="VPERMT2PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128i" varname="idx" etype="UI32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	off := idx[i+1:i]*32
+	dst[i+31:i] := idx[i+2] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="xmm, xmm, xmm" xed="VPERMI2PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VPERMT2PS" form="xmm, xmm, xmm" xed="VPERMT2PS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask2_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="ymm {k}, ymm, ymm" xed="VPERMI2Q_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMT2Q" form="ymm {k}, ymm, ymm" xed="VPERMT2Q_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+2]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="ymm {z}, ymm, ymm" xed="VPERMI2Q_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<instruction name="VPERMT2Q" form="ymm {z}, ymm, ymm" xed="VPERMT2Q_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	off := idx[i+1:i]*64
+	dst[i+63:i] := idx[i+2] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="ymm, ymm, ymm" xed="VPERMI2Q_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<instruction name="VPERMT2Q" form="ymm, ymm, ymm" xed="VPERMT2Q_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask2_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="xmm {k}, xmm, xmm" xed="VPERMI2Q_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMT2Q" form="xmm {k}, xmm, xmm" xed="VPERMT2Q_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+1]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="xmm {z}, xmm, xmm" xed="VPERMI2Q_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<instruction name="VPERMT2Q" form="xmm {z}, xmm, xmm" xed="VPERMT2Q_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="idx" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	off := idx[i]*64
+	dst[i+63:i] := idx[i+1] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="xmm, xmm, xmm" xed="VPERMI2Q_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<instruction name="VPERMT2Q" form="xmm, xmm, xmm" xed="VPERMT2Q_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="ymm {k}, ymm, imm8" xed="VPERMILPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="ymm {k}, ymm, ymm" xed="VPERMILPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="ymm {z}, ymm, imm8" xed="VPERMILPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="ymm {z}, ymm, ymm" xed="VPERMILPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="xmm {k}, xmm, imm8" xed="VPERMILPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="xmm {k}, xmm, xmm" xed="VPERMILPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="xmm {z}, xmm, imm8" xed="VPERMILPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="xmm {z}, xmm, xmm" xed="VPERMILPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="ymm {k}, ymm, imm8" xed="VPERMILPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="ymm {k}, ymm, ymm" xed="VPERMILPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="ymm {z}, ymm, imm8" xed="VPERMILPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="ymm {z}, ymm, ymm" xed="VPERMILPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="xmm {k}, xmm, imm8" xed="VPERMILPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="xmm {k}, xmm, xmm" xed="VPERMILPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="xmm {z}, xmm, imm8" xed="VPERMILPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="xmm {z}, xmm, xmm" xed="VPERMILPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPD" form="ymm {k}, ymm, imm8" xed="VPERMPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPD" form="ymm {k}, ymm, ymm" xed="VPERMPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPD" form="ymm {z}, ymm, imm8" xed="VPERMPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPD" form="ymm {z}, ymm, ymm" xed="VPERMPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPD" form="ymm, ymm, imm8" xed="VPERMPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPD" form="ymm, ymm, ymm" xed="VPERMPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPS" form="ymm {k}, ymm, ymm" xed="VPERMPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPS" form="ymm {z}, ymm, ymm" xed="VPERMPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256i" varname="idx" etype="UI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	id := idx[i+2:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMPS" form="ymm, ymm, ymm" xed="VPERMPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 64-bit integers in "a" across lanes lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMQ" form="ymm {k}, ymm, imm8" xed="VPERMQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMQ" form="ymm {k}, ymm, ymm" xed="VPERMQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMQ" form="ymm {z}, ymm, imm8" xed="VPERMQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMQ" form="ymm {z}, ymm, ymm" xed="VPERMQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMQ" form="ymm, ymm, imm8" xed="VPERMQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="idx" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	id := idx[i+1:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMQ" form="ymm, ymm, ymm" xed="VPERMQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="ymm {k}, ymm" xed="VPEXPANDD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="ymm {k}, m64" xed="VPEXPANDD_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="ymm {z}, ymm" xed="VPEXPANDD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="ymm {z}, m64" xed="VPEXPANDD_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="xmm {k}, xmm" xed="VPEXPANDD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="xmm {k}, m64" xed="VPEXPANDD_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="xmm {z}, xmm" xed="VPEXPANDD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="xmm {z}, m64" xed="VPEXPANDD_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="ymm {k}, ymm" xed="VPEXPANDQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="ymm {k}, m64" xed="VPEXPANDQ_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="ymm {z}, ymm" xed="VPEXPANDQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="ymm {z}, m64" xed="VPEXPANDQ_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="xmm {k}, xmm" xed="VPEXPANDQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="xmm {k}, m64" xed="VPEXPANDQ_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="xmm {z}, xmm" xed="VPEXPANDQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="xmm {z}, m64" xed="VPEXPANDQ_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="ymm {k}, vm32y" xed="VPGATHERDD_YMMu32_MASKmskw_MEMu32_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="xmm {k}, vm32x" xed="VPGATHERDD_XMMu32_MASKmskw_MEMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="ymm {k}, vm32x" xed="VPGATHERDQ_YMMu64_MASKmskw_MEMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="xmm {k}, vm32x" xed="VPGATHERDQ_XMMu64_MASKmskw_MEMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="xmm {k}, vm64y" xed="VPGATHERQD_XMMu32_MASKmskw_MEMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="xmm {k}, vm64x" xed="VPGATHERQD_XMMu32_MASKmskw_MEMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mmask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="ymm {k}, vm64y" xed="VPGATHERQQ_YMMu64_MASKmskw_MEMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mmask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="xmm {k}, vm64x" xed="VPGATHERQQ_XMMu64_MASKmskw_MEMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="ymm {k}, ymm, ymm" xed="VPMAXSD_YMMi32_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="ymm {z}, ymm, ymm" xed="VPMAXSD_YMMi32_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="xmm {k}, xmm, xmm" xed="VPMAXSD_XMMi32_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="xmm {z}, xmm, xmm" xed="VPMAXSD_XMMi32_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="ymm {k}, ymm, ymm" xed="VPMAXSQ_YMMi64_MASKmskw_YMMi64_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="ymm {z}, ymm, ymm" xed="VPMAXSQ_YMMi64_MASKmskw_YMMi64_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="ymm, ymm, ymm" xed="VPMAXSQ_YMMi64_MASKmskw_YMMi64_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="xmm {k}, xmm, xmm" xed="VPMAXSQ_XMMi64_MASKmskw_XMMi64_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="xmm {z}, xmm, xmm" xed="VPMAXSQ_XMMi64_MASKmskw_XMMi64_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="xmm, xmm, xmm" xed="VPMAXSQ_XMMi64_MASKmskw_XMMi64_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="ymm {k}, ymm, ymm" xed="VPMAXUD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="ymm {z}, ymm, ymm" xed="VPMAXUD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="xmm {k}, xmm, xmm" xed="VPMAXUD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="xmm {z}, xmm, xmm" xed="VPMAXUD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="ymm {k}, ymm, ymm" xed="VPMAXUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="ymm {z}, ymm, ymm" xed="VPMAXUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="ymm, ymm, ymm" xed="VPMAXUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="xmm {k}, xmm, xmm" xed="VPMAXUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="xmm {z}, xmm, xmm" xed="VPMAXUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="xmm, xmm, xmm" xed="VPMAXUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSD" form="ymm {k}, ymm, ymm" xed="VPMINSD_YMMi32_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSD" form="ymm {z}, ymm, ymm" xed="VPMINSD_YMMi32_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSD" form="xmm {k}, xmm, xmm" xed="VPMINSD_XMMi32_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSD" form="xmm {z}, xmm, xmm" xed="VPMINSD_XMMi32_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="ymm {k}, ymm, ymm" xed="VPMINSQ_YMMi64_MASKmskw_YMMi64_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="ymm {z}, ymm, ymm" xed="VPMINSQ_YMMi64_MASKmskw_YMMi64_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="ymm, ymm, ymm" xed="VPMINSQ_YMMi64_MASKmskw_YMMi64_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="xmm {k}, xmm, xmm" xed="VPMINSQ_XMMi64_MASKmskw_XMMi64_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="xmm {z}, xmm, xmm" xed="VPMINSQ_XMMi64_MASKmskw_XMMi64_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="xmm, xmm, xmm" xed="VPMINSQ_XMMi64_MASKmskw_XMMi64_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUD" form="ymm {k}, ymm, ymm" xed="VPMINUD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUD" form="ymm {z}, ymm, ymm" xed="VPMINUD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUD" form="xmm {k}, xmm, xmm" xed="VPMINUD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUD" form="xmm {z}, xmm, xmm" xed="VPMINUD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="ymm {k}, ymm, ymm" xed="VPMINUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="ymm {z}, ymm, ymm" xed="VPMINUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="ymm, ymm, ymm" xed="VPMINUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="xmm {k}, xmm, xmm" xed="VPMINUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="xmm {z}, xmm, xmm" xed="VPMINUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="xmm, xmm, xmm" xed="VPMINUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Truncate8(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm, ymm" xed="VPMOVDB_XMMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm {k}, ymm" xed="VPMOVDB_XMMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVDB" form="m64 {k}, ymm" xed="VPMOVDB_MEMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm {z}, ymm" xed="VPMOVDB_XMMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Truncate8(a[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm, xmm" xed="VPMOVDB_XMMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm {k}, xmm" xed="VPMOVDB_XMMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVDB" form="m64 {k}, xmm" xed="VPMOVDB_MEMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm {z}, xmm" xed="VPMOVDB_XMMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Truncate16(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="xmm, ymm" xed="VPMOVDW_XMMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="xmm {k}, ymm" xed="VPMOVDW_XMMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVDW" form="m128 {k}, ymm" xed="VPMOVDW_MEMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="xmm {z}, ymm" xed="VPMOVDW_XMMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Truncate16(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="xmm, xmm" xed="VPMOVDW_XMMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="xmm {k}, xmm" xed="VPMOVDW_XMMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVDW" form="m64 {k}, xmm" xed="VPMOVDW_MEMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="xmm {z}, xmm" xed="VPMOVDW_XMMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Truncate8(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm, ymm" xed="VPMOVQB_XMMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm {k}, ymm" xed="VPMOVQB_XMMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQB" form="m32 {k}, ymm" xed="VPMOVQB_MEMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm {z}, ymm" xed="VPMOVQB_XMMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Truncate8(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm, xmm" xed="VPMOVQB_XMMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm {k}, xmm" xed="VPMOVQB_XMMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQB" form="m16 {k}, xmm" xed="VPMOVQB_MEMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm {z}, xmm" xed="VPMOVQB_XMMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Truncate32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="xmm, ymm" xed="VPMOVQD_XMMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="xmm {k}, ymm" xed="VPMOVQD_XMMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQD" form="m128 {k}, ymm" xed="VPMOVQD_MEMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="xmm {z}, ymm" xed="VPMOVQD_XMMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Truncate32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="xmm, xmm" xed="VPMOVQD_XMMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="xmm {k}, xmm" xed="VPMOVQD_XMMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQD" form="m64 {k}, xmm" xed="VPMOVQD_MEMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="xmm {z}, xmm" xed="VPMOVQD_XMMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Truncate16(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm, ymm" xed="VPMOVQW_XMMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm {k}, ymm" xed="VPMOVQW_XMMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQW" form="m64 {k}, ymm" xed="VPMOVQW_MEMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm {z}, ymm" xed="VPMOVQW_XMMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Truncate16(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm, xmm" xed="VPMOVQW_XMMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm {k}, xmm" xed="VPMOVQW_XMMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQW" form="m32 {k}, xmm" xed="VPMOVQW_MEMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm {z}, xmm" xed="VPMOVQW_XMMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate8(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm, ymm" xed="VPMOVSDB_XMMi8_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm {k}, ymm" xed="VPMOVSDB_XMMi8_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSDB" form="m64 {k}, ymm" xed="VPMOVSDB_MEMi8_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm {z}, ymm" xed="VPMOVSDB_XMMi8_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate8(a[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm, xmm" xed="VPMOVSDB_XMMi8_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm {k}, xmm" xed="VPMOVSDB_XMMi8_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSDB" form="m32 {k}, xmm" xed="VPMOVSDB_MEMi8_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm {z}, xmm" xed="VPMOVSDB_XMMi8_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate16(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="xmm, ymm" xed="VPMOVSDW_XMMi16_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="xmm {k}, ymm" xed="VPMOVSDW_XMMi16_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSDW" form="m128 {k}, ymm" xed="VPMOVSDW_MEMi16_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="xmm {z}, ymm" xed="VPMOVSDW_XMMi16_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate16(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="xmm, xmm" xed="VPMOVSDW_XMMi16_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="xmm {k}, xmm" xed="VPMOVSDW_XMMi16_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI16" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSDW" form="m64 {k}, xmm" xed="VPMOVSDW_MEMi16_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="xmm {z}, xmm" xed="VPMOVSDW_XMMi16_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate8(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm, ymm" xed="VPMOVSQB_XMMi8_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm {k}, ymm" xed="VPMOVSQB_XMMi8_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQB" form="m32 {k}, ymm" xed="VPMOVSQB_MEMi8_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm {z}, ymm" xed="VPMOVSQB_XMMi8_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate8(a[i+63:i])
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm, xmm" xed="VPMOVSQB_XMMi8_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm {k}, xmm" xed="VPMOVSQB_XMMi8_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQB" form="m16 {k}, xmm" xed="VPMOVSQB_MEMi8_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm {z}, xmm" xed="VPMOVSQB_XMMi8_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="xmm, ymm" xed="VPMOVSQD_XMMi32_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="xmm {k}, ymm" xed="VPMOVSQD_XMMi32_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQD" form="m128 {k}, ymm" xed="VPMOVSQD_MEMi32_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="xmm {z}, ymm" xed="VPMOVSQD_XMMi32_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="xmm, xmm" xed="VPMOVSQD_XMMi32_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="xmm {k}, xmm" xed="VPMOVSQD_XMMi32_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI32" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQD" form="m64 {k}, xmm" xed="VPMOVSQD_MEMi32_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="xmm {z}, xmm" xed="VPMOVSQD_XMMi32_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate16(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm, ymm" xed="VPMOVSQW_XMMi16_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm {k}, ymm" xed="VPMOVSQW_XMMi16_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtsepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI16" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQW" form="m64 {k}, ymm" xed="VPMOVSQW_MEMi16_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm {z}, ymm" xed="VPMOVSQW_XMMi16_MASKmskw_YMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate16(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm, xmm" xed="VPMOVSQW_XMMi16_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm {k}, xmm" xed="VPMOVSQW_XMMi16_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI16" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQW" form="m32 {k}, xmm" xed="VPMOVSQW_MEMi16_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm {z}, xmm" xed="VPMOVSQW_XMMi16_MASKmskw_XMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="ymm {k}, xmm" xed="VPMOVSXBD_YMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="ymm {z}, xmm" xed="VPMOVSXBD_YMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="xmm {k}, xmm" xed="VPMOVSXBD_XMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="xmm {z}, xmm" xed="VPMOVSXBD_XMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m256i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="ymm {k}, xmm" xed="VPMOVSXBQ_YMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="ymm {z}, xmm" xed="VPMOVSXBQ_YMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="xmm {k}, xmm" xed="VPMOVSXBQ_XMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="xmm {z}, xmm" xed="VPMOVSXBQ_XMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="ymm {k}, xmm" xed="VPMOVSXDQ_YMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="ymm {z}, xmm" xed="VPMOVSXDQ_YMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="xmm {k}, xmm" xed="VPMOVSXDQ_XMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="xmm {z}, xmm" xed="VPMOVSXDQ_XMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*16
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="ymm {k}, xmm" xed="VPMOVSXWD_YMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="ymm {z}, xmm" xed="VPMOVSXWD_YMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	l := j*16
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="xmm {k}, xmm" xed="VPMOVSXWD_XMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="xmm {z}, xmm" xed="VPMOVSXWD_XMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m256i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="ymm {k}, xmm" xed="VPMOVSXWQ_YMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="ymm {z}, xmm" xed="VPMOVSXWQ_YMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="xmm {k}, xmm" xed="VPMOVSXWQ_XMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="xmm {z}, xmm" xed="VPMOVSXWQ_XMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := SaturateU8(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm, ymm" xed="VPMOVUSDB_XMMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm {k}, ymm" xed="VPMOVUSDB_XMMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSDB" form="m64 {k}, ymm" xed="VPMOVUSDB_MEMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm {z}, ymm" xed="VPMOVUSDB_XMMu8_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := SaturateU8(a[i+31:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm, xmm" xed="VPMOVUSDB_XMMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm {k}, xmm" xed="VPMOVUSDB_XMMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSDB" form="m32 {k}, xmm" xed="VPMOVUSDB_MEMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm {z}, xmm" xed="VPMOVUSDB_XMMu8_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := SaturateU16(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="xmm, ymm" xed="VPMOVUSDW_XMMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="xmm {k}, ymm" xed="VPMOVUSDW_XMMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSDW" form="m128 {k}, ymm" xed="VPMOVUSDW_MEMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="xmm {z}, ymm" xed="VPMOVUSDW_XMMu16_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := SaturateU16(a[i+31:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="xmm, xmm" xed="VPMOVUSDW_XMMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="xmm {k}, xmm" xed="VPMOVUSDW_XMMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSDW" form="m64 {k}, xmm" xed="VPMOVUSDW_MEMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="xmm {z}, xmm" xed="VPMOVUSDW_XMMu16_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := SaturateU8(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm, ymm" xed="VPMOVUSQB_XMMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm {k}, ymm" xed="VPMOVUSQB_XMMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQB" form="m32 {k}, ymm" xed="VPMOVUSQB_MEMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm {z}, ymm" xed="VPMOVUSQB_XMMu8_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := SaturateU8(a[i+63:i])
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm, xmm" xed="VPMOVUSQB_XMMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm {k}, xmm" xed="VPMOVUSQB_XMMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQB" form="m16 {k}, xmm" xed="VPMOVUSQB_MEMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm {z}, xmm" xed="VPMOVUSQB_XMMu8_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := SaturateU32(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="xmm, ymm" xed="VPMOVUSQD_XMMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := SaturateU32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="xmm {k}, ymm" xed="VPMOVUSQD_XMMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQD" form="m128 {k}, ymm" xed="VPMOVUSQD_MEMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := SaturateU32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="xmm {z}, ymm" xed="VPMOVUSQD_XMMu32_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := SaturateU32(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="xmm, xmm" xed="VPMOVUSQD_XMMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := SaturateU32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="xmm {k}, xmm" xed="VPMOVUSQD_XMMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQD" form="m64 {k}, xmm" xed="VPMOVUSQD_MEMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := SaturateU32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="xmm {z}, xmm" xed="VPMOVUSQD_XMMu32_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := SaturateU16(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm, ymm" xed="VPMOVUSQW_XMMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm {k}, ymm" xed="VPMOVUSQW_XMMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtusepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQW" form="m64 {k}, ymm" xed="VPMOVUSQW_MEMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm {z}, ymm" xed="VPMOVUSQW_XMMu16_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := SaturateU16(a[i+63:i])
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm, xmm" xed="VPMOVUSQW_XMMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm {k}, xmm" xed="VPMOVUSQW_XMMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtusepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQW" form="m32 {k}, xmm" xed="VPMOVUSQW_MEMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm {z}, xmm" xed="VPMOVUSQW_XMMu16_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="ymm {k}, xmm" xed="VPMOVZXBD_YMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="ymm {z}, xmm" xed="VPMOVZXBD_YMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="xmm {k}, xmm" xed="VPMOVZXBD_XMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in th elow 4 bytes of "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="xmm {z}, xmm" xed="VPMOVZXBD_XMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="ymm {k}, xmm" xed="VPMOVZXBQ_YMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="ymm {z}, xmm" xed="VPMOVZXBQ_YMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="xmm {k}, xmm" xed="VPMOVZXBQ_XMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 2 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="xmm {z}, xmm" xed="VPMOVZXBQ_XMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="ymm {k}, xmm" xed="VPMOVZXDQ_YMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+31:l])
+	ELSE 
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="ymm {z}, xmm" xed="VPMOVZXDQ_YMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="xmm {k}, xmm" xed="VPMOVZXDQ_XMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+31:l])
+	ELSE 
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="xmm {z}, xmm" xed="VPMOVZXDQ_XMMi64_MASKmskw_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="ymm {k}, xmm" xed="VPMOVZXWD_YMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="ymm {z}, xmm" xed="VPMOVZXWD_YMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="xmm {k}, xmm" xed="VPMOVZXWD_XMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="xmm {z}, xmm" xed="VPMOVZXWD_XMMi32_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="ymm {k}, xmm" xed="VPMOVZXWQ_YMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="ymm {z}, xmm" xed="VPMOVZXWQ_YMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="xmm {k}, xmm" xed="VPMOVZXWQ_XMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in the low 4 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="xmm {z}, xmm" xed="VPMOVZXWQ_XMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__m256i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="ymm {k}, ymm, ymm" xed="VPMULDQ_YMMi64_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="ymm {z}, ymm, ymm" xed="VPMULDQ_YMMi64_MASKmskw_YMMi32_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="xmm {k}, xmm, xmm" xed="VPMULDQ_XMMi64_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="xmm {z}, xmm, xmm" xed="VPMULDQ_XMMi64_MASKmskw_XMMi32_XMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLD" form="ymm {k}, ymm, ymm" xed="VPMULLD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULLD" form="ymm {z}, ymm, ymm" xed="VPMULLD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLD" form="zmm {z}, zmm, zmm" xed="VPMULLD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULLD" form="xmm {k}, xmm, xmm" xed="VPMULLD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULLD" form="xmm {z}, xmm, xmm" xed="VPMULLD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="ymm {k}, ymm, ymm" xed="VPMULUDQ_YMMu64_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="ymm {z}, ymm, ymm" xed="VPMULUDQ_YMMu64_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="xmm {k}, xmm, xmm" xed="VPMULUDQ_XMMu64_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="xmm {z}, xmm, xmm" xed="VPMULUDQ_XMMu64_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPORD" form="ymm {k}, ymm, ymm" xed="VPORD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPORD" form="ymm {z}, ymm, ymm" xed="VPORD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPORD" form="xmm {k}, xmm, xmm" xed="VPORD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPORD" form="xmm {z}, xmm, xmm" xed="VPORD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPORQ" form="ymm {k}, ymm, ymm" xed="VPORQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPORQ" form="ymm {z}, ymm, ymm" xed="VPORQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPORQ" form="xmm {k}, xmm, xmm" xed="VPORQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPORQ" form="xmm {z}, xmm, xmm" xed="VPORQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLD" form="ymm {k}, ymm, imm8" xed="VPROLD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLD" form="ymm {z}, ymm, imm8" xed="VPROLD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLD" form="ymm, ymm, imm8" xed="VPROLD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLD" form="xmm {k}, xmm, imm8" xed="VPROLD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLD" form="xmm {z}, xmm, imm8" xed="VPROLD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLD" form="xmm, xmm, imm8" xed="VPROLD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLQ" form="ymm {k}, ymm, imm8" xed="VPROLQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLQ" form="ymm {z}, ymm, imm8" xed="VPROLQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLQ" form="ymm, ymm, imm8" xed="VPROLQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLQ" form="xmm {k}, xmm, imm8" xed="VPROLQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLQ" form="xmm {z}, xmm, imm8" xed="VPROLQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLQ" form="xmm, xmm, imm8" xed="VPROLQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLVD" form="ymm {k}, ymm, ymm" xed="VPROLVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLVD" form="ymm {z}, ymm, ymm" xed="VPROLVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLVD" form="ymm, ymm, ymm" xed="VPROLVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLVD" form="xmm {k}, xmm, xmm" xed="VPROLVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLVD" form="xmm {z}, xmm, xmm" xed="VPROLVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLVD" form="xmm, xmm, xmm" xed="VPROLVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="ymm {k}, ymm, ymm" xed="VPROLVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="ymm {z}, ymm, ymm" xed="VPROLVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="ymm, ymm, ymm" xed="VPROLVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="xmm {k}, xmm, xmm" xed="VPROLVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="xmm {z}, xmm, xmm" xed="VPROLVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="xmm, xmm, xmm" xed="VPROLVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORD" form="ymm {k}, ymm, imm8" xed="VPRORD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORD" form="ymm {z}, ymm, imm8" xed="VPRORD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORD" form="ymm, ymm, imm8" xed="VPRORD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORD" form="xmm {k}, xmm, imm8" xed="VPRORD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORD" form="xmm {z}, xmm, imm8" xed="VPRORD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORD" form="xmm, xmm, imm8" xed="VPRORD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORQ" form="ymm {k}, ymm, imm8" xed="VPRORQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORQ" form="ymm {z}, ymm, imm8" xed="VPRORQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORQ" form="ymm, ymm, imm8" xed="VPRORQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORQ" form="xmm {k}, xmm, imm8" xed="VPRORQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORQ" form="xmm {z}, xmm, imm8" xed="VPRORQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORQ" form="xmm, xmm, imm8" xed="VPRORQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORVD" form="ymm {k}, ymm, ymm" xed="VPRORVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORVD" form="ymm {z}, ymm, ymm" xed="VPRORVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORVD" form="ymm, ymm, ymm" xed="VPRORVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORVD" form="xmm {k}, xmm, xmm" xed="VPRORVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORVD" form="xmm {z}, xmm, xmm" xed="VPRORVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORVD" form="xmm, xmm, xmm" xed="VPRORVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="ymm {k}, ymm, ymm" xed="VPRORVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="ymm {z}, ymm, ymm" xed="VPRORVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="ymm, ymm, ymm" xed="VPRORVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="xmm {k}, xmm, xmm" xed="VPRORVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="xmm {z}, xmm, xmm" xed="VPRORVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="xmm, xmm, xmm" xed="VPRORVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="vm32y, ymm" xed="VPSCATTERDD_MEMu32_MASKmskw_YMMu32_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="vm32y {k}, ymm" xed="VPSCATTERDD_MEMu32_MASKmskw_YMMu32_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="vm32x, xmm" xed="VPSCATTERDD_MEMu32_MASKmskw_XMMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="vm32x {k}, xmm" xed="VPSCATTERDD_MEMu32_MASKmskw_XMMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="vm32x, ymm" xed="VPSCATTERDQ_MEMu64_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="vm32x {k}, ymm" xed="VPSCATTERDQ_MEMu64_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="vm32x, xmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="vm32x {k}, xmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQD" form="vm64y, xmm" xed="VPSCATTERQD_MEMu32_MASKmskw_XMMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQD" form="vm64y {k}, xmm" xed="VPSCATTERQD_MEMu32_MASKmskw_XMMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQD" form="vm64x, xmm" xed="VPSCATTERQD_MEMu32_MASKmskw_XMMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQD" form="vm64x {k}, xmm" xed="VPSCATTERQD_MEMu32_MASKmskw_XMMu32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQQ" form="vm64y, ymm" xed="VPSCATTERQQ_MEMu64_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQQ" form="vm64y {k}, ymm" xed="VPSCATTERQQ_MEMu64_MASKmskw_YMMu64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQQ" form="vm64x, xmm" xed="VPSCATTERQQ_MEMu64_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQQ" form="vm64x {k}, xmm" xed="VPSCATTERQQ_MEMu64_MASKmskw_XMMu64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="ymm {k}, ymm, imm8" xed="VPSHUFD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="ymm {z}, ymm, imm8" xed="VPSHUFD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="xmm {k}, xmm, imm8" xed="VPSHUFD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="xmm {z}, xmm, imm8" xed="VPSHUFD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLD" form="ymm {k}, ymm, xmm" xed="VPSLLD_YMMu32_MASKmskw_YMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLD" form="ymm {k}, ymm, imm8" xed="VPSLLD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLD" form="ymm {z}, ymm, xmm" xed="VPSLLD_YMMu32_MASKmskw_YMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLD" form="ymm {z}, ymm, imm8" xed="VPSLLD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLD" form="xmm {k}, xmm, xmm" xed="VPSLLD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLD" form="xmm {k}, xmm, imm8" xed="VPSLLD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLD" form="xmm {z}, xmm, xmm" xed="VPSLLD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLD" form="xmm {z}, xmm, imm8" xed="VPSLLD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="ymm {k}, ymm, xmm" xed="VPSLLQ_YMMu64_MASKmskw_YMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="ymm {k}, ymm, imm8" xed="VPSLLQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="ymm {z}, ymm, xmm" xed="VPSLLQ_YMMu64_MASKmskw_YMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="ymm {z}, ymm, imm8" xed="VPSLLQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="xmm {k}, xmm, xmm" xed="VPSLLQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="xmm {k}, xmm, imm8" xed="VPSLLQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="xmm {z}, xmm, xmm" xed="VPSLLQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="xmm {z}, xmm, imm8" xed="VPSLLQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="ymm {k}, ymm, ymm" xed="VPSLLVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="ymm {z}, ymm, ymm" xed="VPSLLVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="xmm {k}, xmm, xmm" xed="VPSLLVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="xmm {z}, xmm, xmm" xed="VPSLLVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="ymm {k}, ymm, ymm" xed="VPSLLVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="ymm {z}, ymm, ymm" xed="VPSLLVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="xmm {k}, xmm, xmm" xed="VPSLLVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="xmm {z}, xmm, xmm" xed="VPSLLVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAD" form="ymm {k}, ymm, xmm" xed="VPSRAD_YMMu32_MASKmskw_YMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAD" form="ymm {k}, ymm, imm8" xed="VPSRAD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAD" form="ymm {z}, ymm, xmm" xed="VPSRAD_YMMu32_MASKmskw_YMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAD" form="ymm {z}, ymm, imm8" xed="VPSRAD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAD" form="xmm {k}, xmm, xmm" xed="VPSRAD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAD" form="xmm {k}, xmm, imm8" xed="VPSRAD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAD" form="xmm {z}, xmm, xmm" xed="VPSRAD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="6"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAD" form="xmm {z}, xmm, imm8" xed="VPSRAD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="ymm {k}, ymm, xmm" xed="VPSRAQ_YMMu64_MASKmskw_YMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="ymm {k}, ymm, imm8" xed="VPSRAQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="ymm {z}, ymm, xmm" xed="VPSRAQ_YMMu64_MASKmskw_YMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="ymm {z}, ymm, imm8" xed="VPSRAQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+	ELSE
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="ymm, ymm, xmm" xed="VPSRAQ_YMMu64_MASKmskw_YMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+	ELSE
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="ymm, ymm, imm8" xed="VPSRAQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="xmm {k}, xmm, xmm" xed="VPSRAQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="xmm {k}, xmm, imm8" xed="VPSRAQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="xmm {z}, xmm, xmm" xed="VPSRAQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="7"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="xmm {z}, xmm, imm8" xed="VPSRAQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+	ELSE
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="xmm, xmm, xmm" xed="VPSRAQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="7"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+	ELSE
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="xmm, xmm, imm8" xed="VPSRAQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="ymm {k}, ymm, ymm" xed="VPSRAVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="ymm {z}, ymm, ymm" xed="VPSRAVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="xmm {k}, xmm, xmm" xed="VPSRAVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="xmm {z}, xmm, xmm" xed="VPSRAVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="ymm {k}, ymm, ymm" xed="VPSRAVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="ymm {z}, ymm, ymm" xed="VPSRAVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="SI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="ymm, ymm, ymm" xed="VPSRAVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="xmm {k}, xmm, xmm" xed="VPSRAVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="xmm {z}, xmm, xmm" xed="VPSRAVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="xmm, xmm, xmm" xed="VPSRAVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLD" form="ymm {k}, ymm, xmm" xed="VPSRLD_YMMu32_MASKmskw_YMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLD" form="ymm {k}, ymm, imm8" xed="VPSRLD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLD" form="ymm {z}, ymm, xmm" xed="VPSRLD_YMMu32_MASKmskw_YMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLD" form="ymm {z}, ymm, imm8" xed="VPSRLD_YMMu32_MASKmskw_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLD" form="xmm {k}, xmm, xmm" xed="VPSRLD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLD" form="xmm {k}, xmm, imm8" xed="VPSRLD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLD" form="xmm {z}, xmm, xmm" xed="VPSRLD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLD" form="xmm {z}, xmm, imm8" xed="VPSRLD_XMMu32_MASKmskw_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="ymm {k}, ymm, xmm" xed="VPSRLQ_YMMu64_MASKmskw_YMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="ymm {k}, ymm, imm8" xed="VPSRLQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="ymm {z}, ymm, xmm" xed="VPSRLQ_YMMu64_MASKmskw_YMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="ymm {z}, ymm, imm8" xed="VPSRLQ_YMMu64_MASKmskw_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="xmm {k}, xmm, xmm" xed="VPSRLQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="xmm {k}, xmm, imm8" xed="VPSRLQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="xmm {z}, xmm, xmm" xed="VPSRLQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="xmm {z}, xmm, imm8" xed="VPSRLQ_XMMu64_MASKmskw_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="ymm {k}, ymm, ymm" xed="VPSRLVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="ymm {z}, ymm, ymm" xed="VPSRLVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="xmm {k}, xmm, xmm" xed="VPSRLVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="xmm {z}, xmm, xmm" xed="VPSRLVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="ymm {k}, ymm, ymm" xed="VPSRLVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="ymm {z}, ymm, ymm" xed="VPSRLVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="xmm {k}, xmm, xmm" xed="VPSRLVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="xmm {z}, xmm, xmm" xed="VPSRLVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBD" form="ymm {k}, ymm, ymm" xed="VPSUBD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBD" form="ymm {z}, ymm, ymm" xed="VPSUBD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBD" form="xmm {k}, xmm, xmm" xed="VPSUBD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBD" form="xmm {z}, xmm, xmm" xed="VPSUBD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="ymm {k}, ymm, ymm" xed="VPSUBQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="ymm {z}, ymm, ymm" xed="VPSUBQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="xmm {k}, xmm, xmm" xed="VPSUBQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="xmm {z}, xmm, xmm" xed="VPSUBQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="ymm {k}, ymm, ymm, imm8" xed="VPTERNLOGD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="ymm {z}, ymm, ymm, imm8" xed="VPTERNLOGD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	FOR h := 0 to 31
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="ymm, ymm, ymm, imm8" xed="VPTERNLOGD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="xmm {k}, xmm, xmm, imm8" xed="VPTERNLOGD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="xmm {z}, xmm, xmm, imm8" xed="VPTERNLOGD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	FOR h := 0 to 31
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="xmm, xmm, xmm, imm8" xed="VPTERNLOGD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="ymm {k}, ymm, ymm, imm8" xed="VPTERNLOGQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="ymm {z}, ymm, ymm, imm8" xed="VPTERNLOGQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	FOR h := 0 to 63
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="ymm, ymm, ymm, imm8" xed="VPTERNLOGQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="xmm {k}, xmm, xmm, imm8" xed="VPTERNLOGQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="xmm {z}, xmm, xmm, imm8" xed="VPTERNLOGQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	FOR h := 0 to 63
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="xmm, xmm, xmm, imm8" xed="VPTERNLOGQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTMD" form="k {k}, ymm, ymm" xed="VPTESTMD_MASKmskw_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTMD" form="k, ymm, ymm" xed="VPTESTMD_MASKmskw_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTMD" form="k {k}, xmm, xmm" xed="VPTESTMD_MASKmskw_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTMD" form="k, xmm, xmm" xed="VPTESTMD_MASKmskw_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTMQ" form="k {k}, ymm, ymm" xed="VPTESTMQ_MASKmskw_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTMQ" form="k, ymm, ymm" xed="VPTESTMQ_MASKmskw_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPTESTMQ" form="k {k}, xmm, xmm" xed="VPTESTMQ_MASKmskw_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPTESTMQ" form="k, xmm, xmm" xed="VPTESTMQ_MASKmskw_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTNMD" form="k {k}, ymm, ymm" xed="VPTESTNMD_MASKmskw_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTNMD" form="k, ymm, ymm" xed="VPTESTNMD_MASKmskw_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTNMD" form="k {k}, xmm, xmm" xed="VPTESTNMD_MASKmskw_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTNMD" form="k, xmm, xmm" xed="VPTESTNMD_MASKmskw_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTNMQ" form="k {k}, ymm, ymm" xed="VPTESTNMQ_MASKmskw_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:4] := 0
+	</operation>
+	<instruction name="VPTESTNMQ" form="k, ymm, ymm" xed="VPTESTNMQ_MASKmskw_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPTESTNMQ" form="k {k}, xmm, xmm" xed="VPTESTNMQ_MASKmskw_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:2] := 0
+	</operation>
+	<instruction name="VPTESTNMQ" form="k, xmm, xmm" xed="VPTESTNMQ_MASKmskw_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="ymm {k}, ymm, ymm" xed="VPUNPCKHDQ_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="ymm {z}, ymm, ymm" xed="VPUNPCKHDQ_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="xmm {k}, xmm, xmm" xed="VPUNPCKHDQ_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="xmm {z}, xmm, xmm" xed="VPUNPCKHDQ_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="ymm {k}, ymm, ymm" xed="VPUNPCKHQDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="ymm {z}, ymm, ymm" xed="VPUNPCKHQDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="xmm {k}, xmm, xmm" xed="VPUNPCKHQDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="xmm {z}, xmm, xmm" xed="VPUNPCKHQDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="ymm {k}, ymm, ymm" xed="VPUNPCKLDQ_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="ymm {z}, ymm, ymm" xed="VPUNPCKLDQ_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="xmm {k}, xmm, xmm" xed="VPUNPCKLDQ_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="xmm {z}, xmm, xmm" xed="VPUNPCKLDQ_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="ymm {k}, ymm, ymm" xed="VPUNPCKLQDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="ymm {z}, ymm, ymm" xed="VPUNPCKLQDQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="xmm {k}, xmm, xmm" xed="VPUNPCKLQDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="xmm {z}, xmm, xmm" xed="VPUNPCKLQDQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPXORD" form="ymm {k}, ymm, ymm" xed="VPXORD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPXORD" form="ymm {z}, ymm, ymm" xed="VPXORD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPXORD" form="xmm {k}, xmm, xmm" xed="VPXORD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPXORD" form="xmm {z}, xmm, xmm" xed="VPXORD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPXORQ" form="ymm {k}, ymm, ymm" xed="VPXORQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPXORQ" form="ymm {z}, ymm, ymm" xed="VPXORQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPXORQ" form="xmm {k}, xmm, xmm" xed="VPXORQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPXORQ" form="xmm {z}, xmm, xmm" xed="VPXORQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="ymm {k}, ymm" xed="VRCP14PD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="ymm {z}, ymm" xed="VRCP14PD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (1.0 / a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="ymm, ymm" xed="VRCP14PD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="xmm {k}, xmm" xed="VRCP14PD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="xmm {z}, xmm" xed="VRCP14PD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (1.0 / a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="xmm, xmm" xed="VRCP14PD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="ymm {k}, ymm" xed="VRCP14PS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="ymm {z}, ymm" xed="VRCP14PS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="ymm, ymm" xed="VRCP14PS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="xmm {k}, xmm" xed="VRCP14PS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="xmm {z}, xmm" xed="VRCP14PS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="xmm, xmm" xed="VRCP14PS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="ymm {k}, ymm, imm8" xed="VRNDSCALEPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="ymm {z}, ymm, imm8" xed="VRNDSCALEPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="ymm, ymm, imm8" xed="VRNDSCALEPD_YMMf64_MASKmskw_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="xmm {k}, xmm, imm8" xed="VRNDSCALEPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="xmm {z}, xmm, imm8" xed="VRNDSCALEPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="xmm, xmm, imm8" xed="VRNDSCALEPD_XMMf64_MASKmskw_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="ymm {k}, ymm, imm8" xed="VRNDSCALEPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="ymm {z}, ymm, imm8" xed="VRNDSCALEPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="ymm, ymm, imm8" xed="VRNDSCALEPS_YMMf32_MASKmskw_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="xmm {k}, xmm, imm8" xed="VRNDSCALEPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="xmm {z}, xmm, imm8" xed="VRNDSCALEPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="xmm, xmm, imm8" xed="VRNDSCALEPS_XMMf32_MASKmskw_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRSQRT14PD" form="ymm {k}, ymm" xed="VRSQRT14PD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRSQRT14PD" form="ymm {z}, ymm" xed="VRSQRT14PD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14PD" form="xmm {k}, xmm" xed="VRSQRT14PD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14PD" form="xmm {z}, xmm" xed="VRSQRT14PD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRSQRT14PS" form="ymm {k}, ymm" xed="VRSQRT14PS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VRSQRT14PS" form="ymm {z}, ymm" xed="VRSQRT14PS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14PS" form="xmm {k}, xmm" xed="VRSQRT14PS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14PS" form="xmm {z}, xmm" xed="VRSQRT14PS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="ymm {k}, ymm, ymm" xed="VSCALEFPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="ymm {z}, ymm, ymm" xed="VSCALEFPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="ymm, ymm, ymm" xed="VSCALEFPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="xmm {k}, xmm, xmm" xed="VSCALEFPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="xmm {z}, xmm, xmm" xed="VSCALEFPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="xmm, xmm, xmm" xed="VSCALEFPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="ymm {k}, ymm, ymm" xed="VSCALEFPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="ymm {z}, ymm, ymm" xed="VSCALEFPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="ymm, ymm, ymm" xed="VSCALEFPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="xmm {k}, xmm, xmm" xed="VSCALEFPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="xmm {z}, xmm, xmm" xed="VSCALEFPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="xmm, xmm, xmm" xed="VSCALEFPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="vm32x, ymm" xed="VSCATTERDPD_MEMf64_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="vm32x {k}, ymm" xed="VSCATTERDPD_MEMf64_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="vm32x, xmm" xed="VSCATTERDPD_MEMf64_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="vm32x {k}, xmm" xed="VSCATTERDPD_MEMf64_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="vm32y, ymm" xed="VSCATTERDPS_MEMf32_MASKmskw_YMMf32_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="vm32y {k}, ymm" xed="VSCATTERDPS_MEMf32_MASKmskw_YMMf32_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="vm32x, xmm" xed="VSCATTERDPS_MEMf32_MASKmskw_XMMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="vm32x {k}, xmm" xed="VSCATTERDPS_MEMf32_MASKmskw_XMMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPD" form="vm64y, ymm" xed="VSCATTERQPD_MEMf64_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPD" form="vm64y {k}, ymm" xed="VSCATTERQPD_MEMf64_MASKmskw_YMMf64_AVX512_VL256"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPD" form="vm64x, xmm" xed="VSCATTERQPD_MEMf64_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPD" form="vm64x {k}, xmm" xed="VSCATTERQPD_MEMf64_MASKmskw_XMMf64_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPS" form="vm64y, xmm" xed="VSCATTERQPS_MEMf32_MASKmskw_XMMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPS" form="vm64y {k}, xmm" xed="VSCATTERQPS_MEMf32_MASKmskw_XMMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPS" form="vm64x, xmm" xed="VSCATTERQPS_MEMf32_MASKmskw_XMMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="vindex" etype="SI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPS" form="vm64x {k}, xmm" xed="VSCATTERQPS_MEMf32_MASKmskw_XMMf32_AVX512_VL128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFF32X4" form="ymm {k}, ymm, ymm, imm8" xed="VSHUFF32X4_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFF32X4" form="ymm {z}, ymm, ymm, imm8" xed="VSHUFF32X4_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst.m128[0] := a.m128[imm8[0]]
+dst.m128[1] := b.m128[imm8[1]]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFF32X4" form="ymm, ymm, ymm, imm8" xed="VSHUFF32X4_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFF64X2" form="ymm {k}, ymm, ymm, imm8" xed="VSHUFF64X2_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFF64X2" form="ymm {z}, ymm, ymm, imm8" xed="VSHUFF64X2_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst.m128[0] := a.m128[imm8[0]]
+dst.m128[1] := b.m128[imm8[1]]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFF64X2" form="ymm, ymm, ymm, imm8" xed="VSHUFF64X2_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFI32X4" form="ymm {k}, ymm, ymm, imm8" xed="VSHUFI32X4_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFI32X4" form="ymm {z}, ymm, ymm, imm8" xed="VSHUFI32X4_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shuffle_i32x4">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst.m128[0] := a.m128[imm8[0]]
+dst.m128[1] := b.m128[imm8[1]]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFI32X4" form="ymm, ymm, ymm, imm8" xed="VSHUFI32X4_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFI64X2" form="ymm {k}, ymm, ymm, imm8" xed="VSHUFI64X2_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst.m128[0] := a.m128[imm8[0]]
+tmp_dst.m128[1] := b.m128[imm8[1]]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFI64X2" form="ymm {z}, ymm, ymm, imm8" xed="VSHUFI64X2_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shuffle_i64x2">
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst.m128[0] := a.m128[imm8[0]]
+dst.m128[1] := b.m128[imm8[1]]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFI64X2" form="ymm, ymm, ymm, imm8" xed="VSHUFI64X2_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="ymm {k}, ymm, ymm, imm8" xed="VSHUFPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="ymm {z}, ymm, ymm, imm8" xed="VSHUFPD_YMMf64_MASKmskw_YMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="xmm {k}, xmm, xmm, imm8" xed="VSHUFPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="xmm {z}, xmm, xmm, imm8" xed="VSHUFPD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="ymm {k}, ymm, ymm, imm8" xed="VSHUFPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="ymm {z}, ymm, ymm, imm8" xed="VSHUFPS_YMMf32_MASKmskw_YMMf32_YMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="xmm {k}, xmm, xmm, imm8" xed="VSHUFPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="xmm {z}, xmm, xmm, imm8" xed="VSHUFPS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="ymm {k}, ymm" xed="VSQRTPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="ymm {z}, ymm" xed="VSQRTPD_YMMf64_MASKmskw_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="xmm {k}, xmm" xed="VSQRTPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="xmm {z}, xmm" xed="VSQRTPD_XMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="ymm {k}, ymm" xed="VSQRTPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="ymm {z}, ymm" xed="VSQRTPS_YMMf32_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="xmm {k}, xmm" xed="VSQRTPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="xmm {z}, xmm" xed="VSQRTPS_XMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSUBPD" form="ymm {k}, ymm, ymm" xed="VSUBPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSUBPD" form="ymm {z}, ymm, ymm" xed="VSUBPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBPD" form="xmm {k}, xmm, xmm" xed="VSUBPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBPD" form="xmm {z}, xmm, xmm" xed="VSUBPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSUBPS" form="ymm {k}, ymm, ymm" xed="VSUBPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VSUBPS" form="ymm {z}, ymm, ymm" xed="VSUBPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBPS" form="xmm {k}, xmm, xmm" xed="VSUBPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBPS" form="xmm {z}, xmm, xmm" xed="VSUBPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="ymm {k}, ymm, ymm" xed="VUNPCKHPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="ymm {z}, ymm, ymm" xed="VUNPCKHPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="xmm {k}, xmm, xmm" xed="VUNPCKHPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="xmm {z}, xmm, xmm" xed="VUNPCKHPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="ymm {k}, ymm, ymm" xed="VUNPCKHPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="ymm {z}, ymm, ymm" xed="VUNPCKHPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="xmm {k}, xmm, xmm" xed="VUNPCKHPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="xmm {z}, xmm, xmm" xed="VUNPCKHPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="ymm {k}, ymm, ymm" xed="VUNPCKLPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="ymm {z}, ymm, ymm" xed="VUNPCKLPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="xmm {k}, xmm, xmm" xed="VUNPCKLPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="xmm {z}, xmm, xmm" xed="VUNPCKLPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="ymm {k}, ymm, ymm" xed="VUNPCKLPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="ymm {z}, ymm, ymm" xed="VUNPCKLPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="xmm {k}, xmm, xmm" xed="VUNPCKLPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="xmm {z}, xmm, xmm" xed="VUNPCKLPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQU64" form="m512, zmm" xed="VMOVDQU64_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQU32" form="m512, zmm" xed="VMOVDQU32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQU64" form="m256, ymm" xed="VMOVDQU64_MEMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQU32" form="m256, ymm" xed="VMOVDQU32_MEMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="VMOVDQU64" form="m128, xmm" xed="VMOVDQU64_MEMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory.
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="VMOVDQU32" form="m128, xmm" xed="VMOVDQU32_MEMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Store 256-bits (composed of 4 packed 64-bit integers) from "a" into memory.
+		"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQA64" form="m256, ymm" xed="VMOVDQA64_MEMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Store 256-bits (composed of 8 packed 32-bit integers) from "a" into memory.
+		"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+255:mem_addr] := a[255:0]
+	</operation>
+	<instruction name="VMOVDQA32" form="m256, ymm" xed="VMOVDQA32_MEMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Store 128-bits (composed of 2 packed 64-bit integers) from "a" into memory.
+		"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="VMOVDQA64" form="m128, xmm" xed="VMOVDQA64_MEMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Store 128-bits (composed of 4 packed 32-bit integers) from "a" into memory.
+		"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="VMOVDQA32" form="m128, xmm" xed="VMOVDQA32_MEMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="zmm, m512" xed="VMOVDQU64_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="zmm, m512" xed="VMOVDQU32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="ymm, m256" xed="VMOVDQU64_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="ymm, m256" xed="VMOVDQU32_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="xmm, m128" xed="VMOVDQU64_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst".
+		"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="xmm, m128" xed="VMOVDQU32_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="256"/>
+	<description>Load 256-bits (composed of 4 packed 64-bit integers) from memory into "dst".
+		"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="ymm, m256" xed="VMOVDQA64_YMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="256"/>
+	<description>Load 256-bits (composed of 8 packed 32-bit integers) from memory into "dst".
+		"mem_addr" must be aligned on a 32-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[255:0] := MEM[mem_addr+255:mem_addr]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="ymm, m256" xed="VMOVDQA32_YMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="128"/>
+	<description>Load 128-bits (composed of 2 packed 64-bit integers) from memory into "dst".
+		"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="xmm, m128" xed="VMOVDQA64_XMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<description>Load 128-bits (composed of 4 packed 32-bit integers) from memory into "dst".
+		"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="xmm, m128" xed="VMOVDQA32_XMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPXORQ" form="ymm, ymm" xed="VPXORQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPXORD" form="ymm, ymm" xed="VPXORD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPXORQ" form="xmm, xmm" xed="VPXORQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPXORD" form="xmm, xmm" xed="VPXORD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPORQ" form="ymm, ymm" xed="VPORQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPORD" form="ymm, ymm" xed="VPORD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPORQ" form="xmm, xmm" xed="VPORQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPORD" form="xmm, xmm" xed="VPORD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_aesenclast_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m512i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="a" etype="M128"/>
+	<parameter type="__m512i" varname="RoundKey" etype="M128"/>
+	<description>Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"."</description>
+	<operation>FOR j := 0 to 3
+	i := j*128
+	a[i+127:i] := ShiftRows(a[i+127:i])
+	a[i+127:i] := SubBytes(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VAESENCLAST" form="zmm, zmm" xed="VAESENCLAST_ZMMu128_ZMMu128_ZMMu128_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_aesenc_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m512i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="a" etype="M128"/>
+	<parameter type="__m512i" varname="RoundKey" etype="M128"/>
+	<description>Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"."</description>
+	<operation>FOR j := 0 to 3
+	i := j*128
+	a[i+127:i] := ShiftRows(a[i+127:i])
+	a[i+127:i] := SubBytes(a[i+127:i])
+	a[i+127:i] := MixColumns(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VAESENC" form="zmm, zmm" xed="VAESENC_ZMMu128_ZMMu128_ZMMu128_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_aesdeclast_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m512i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="a" etype="M128"/>
+	<parameter type="__m512i" varname="RoundKey" etype="M128"/>
+	<description>Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*128
+	a[i+127:i] := InvShiftRows(a[i+127:i])
+	a[i+127:i] := InvSubBytes(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VAESDECLAST" form="zmm, zmm" xed="VAESDECLAST_ZMMu128_ZMMu128_ZMMu128_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_aesdec_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m512i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="a" etype="M128"/>
+	<parameter type="__m512i" varname="RoundKey" etype="M128"/>
+	<description>Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*128
+	a[i+127:i] := InvShiftRows(a[i+127:i])
+	a[i+127:i] := InvSubBytes(a[i+127:i])
+	a[i+127:i] := InvMixColumns(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VAESDEC" form="zmm, zmm" xed="VAESDEC_ZMMu128_ZMMu128_ZMMu128_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kand_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KANDW" form="k, k, k" xed="KANDW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kandn_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := (NOT a[15:0]) AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KANDNW" form="k, k, k" xed="KANDNW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_knot_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<description>Compute the bitwise NOT of 16-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KNOTW" form="k, k" xed="KNOTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kor_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] OR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KORW" form="k, k, k" xed="KORW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxnor_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT (a[15:0] XOR b[15:0])
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KXNORW" form="k, k, k" xed="KXNORW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kxor_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] XOR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KXORW" form="k, k, k" xed="KXORW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftli_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 16-bit mask "a" left by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 15
+	k[15:0] := a[15:0] &lt;&lt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTLW" form="k, k, imm8" xed="KSHIFTLW_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kshiftri_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="unsigned int" varname="count" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of 16-bit mask "a" right by "count" while shifting in zeros, and store the least significant 16 bits of the result in "k".</description>
+	<operation>
+k[MAX:0] := 0
+IF count[7:0] &lt;= 15
+	k[15:0] := a[15:0] &gt;&gt; count[7:0]
+FI
+	</operation>
+	<instruction name="KSHIFTRW" form="k, k, imm8" xed="KSHIFTRW_MASKmskw_MASKmskw_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_load_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16*" varname="mem_addr" etype="MASK" memwidth="16"/>
+	<description>Load 16-bit mask from memory into "k".</description>
+	<operation>
+k[15:0] := MEM[mem_addr+15:mem_addr]
+	</operation>
+	<instruction name="KMOVW" form="k, m16" xed="KMOVW_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_store_mask16">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__mmask16*" varname="mem_addr" etype="MASK" memwidth="16"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<description>Store 16-bit mask from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+15:mem_addr] := a[15:0]
+	</operation>
+	<instruction name="KMOVW" form="m16, k" xed="KMOVW_MEMu16_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortest_mask16_u8">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<parameter type="unsigned char*" varname="all_ones" etype="UI8" memwidth="8"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeros, store 1 in "dst", otherwise store 0 in "dst". If the result is all ones, store 1 in "all_ones", otherwise store 0 in "all_ones".</description>
+	<operation>
+tmp[15:0] := a[15:0] OR b[15:0]
+IF tmp[15:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+IF tmp[15:0] == 0xFFFF
+	MEM[all_ones+7:all_ones] := 1
+ELSE
+	MEM[all_ones+7:all_ones] := 0
+FI
+	</operation>
+	<instruction name="KORTESTW" form="k, k" xed="KORTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestz_mask16_u8">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all zeroes, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[15:0] := a[15:0] OR b[15:0]
+IF tmp[15:0] == 0x0
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTW" form="k, k" xed="KORTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_kortestc_mask16_u8">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b". If the result is all ones, store 1 in "dst", otherwise store 0 in "dst".</description>
+	<operation>
+tmp[15:0] := a[15:0] OR b[15:0]
+IF tmp[15:0] == 0xFFFF
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="KORTESTW" form="k, k" xed="KORTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtmask16_u32">
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<description>Convert 16-bit mask "a" into an integer value, and store the result in "dst".</description>
+	<operation>
+dst := ZeroExtend32(a[15:0])
+	</operation>
+	<instruction name="KMOVW" form="r32, k" xed="KMOVW_GPR32u32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_cvtu32_mask16">
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="unsigned int" varname="a" etype="UI16"/>
+	<description>Convert integer value "a" into an 16-bit mask, and store the result in "k".</description>
+	<operation>
+k := ZeroExtend16(a[15:0])
+	</operation>
+	<instruction name="KMOVW" form="k, r32" xed="KMOVW_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kandn">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := (NOT a[15:0]) AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KANDNW" form="k, k, k" xed="KANDNW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kand">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KANDW" form="k, k, k" xed="KANDW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kmov">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<description>Copy 16-bit mask "a" to "k".</description>
+	<operation>
+k[15:0] := a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KMOVW" form="k, k" xed="KMOVW_MASKmskw_MASKu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_knot">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<description>Compute the bitwise NOT of 16-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KNOTW" form="k, k" xed="KNOTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kor">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] OR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KORW" form="k, k, k" xed="KORW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kunpackb">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Unpack and interleave 8 bits from masks "a" and "b", and store the 16-bit result in "k".</description>
+	<operation>
+k[7:0] := b[7:0]
+k[15:8] := a[7:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KUNPCKBW" form="k, k, k" xed="KUNPCKBW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kxnor">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT (a[15:0] XOR b[15:0])
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KXNORW" form="k, k, k" xed="KXNORW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kxor">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] XOR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KXORW" form="k, k, k" xed="KXORW_MASKmskw_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPD" form="zmm {z}, zmm, zmm" xed="VADDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPD" form="zmm {z}, zmm, zmm {er}" xed="VADDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPS" form="zmm {z}, zmm, zmm" xed="VADDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPS" form="zmm {z}, zmm, zmm {er}" xed="VADDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_add_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := a[63:0] + b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSD" form="xmm, xmm, xmm {er}" xed="VADDSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSD" form="xmm {k}, xmm, xmm {er}" xed="VADDSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSD" form="xmm {k}, xmm, xmm" xed="VADDSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSD" form="xmm {z}, xmm, xmm {er}" xed="VADDSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] + b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSD" form="xmm {z}, xmm, xmm" xed="VADDSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_add_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]</description>
+	<operation>
+dst[31:0] := a[31:0] + b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSS" form="xmm, xmm, xmm {er}" xed="VADDSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSS" form="xmm {k}, xmm, xmm {er}" xed="VADDSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_add_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSS" form="xmm {k}, xmm, xmm" xed="VADDSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSS" form="xmm {z}, xmm, xmm {er}" xed="VADDSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_add_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] + b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VADDSS" form="xmm {z}, xmm, xmm" xed="VADDSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and stores the low 64 bytes (16 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (32*imm8[3:0])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VALIGND" form="zmm {z}, zmm, zmm, imm8" xed="VALIGND_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst".</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (64*imm8[2:0])
+dst[511:0] := temp[511:0]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="zmm, zmm, zmm, imm8" xed="VALIGNQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and store the low 64 bytes (8 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (64*imm8[2:0])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="zmm {k}, zmm, zmm, imm8" xed="VALIGNQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_alignr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 64-bit elements, and stores the low 64 bytes (8 elements) in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (64*imm8[2:0])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := temp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VALIGNQ" form="zmm {z}, zmm, zmm, imm8" xed="VALIGNQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X4" form="zmm, m128" xed="VBROADCASTF32X4_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X4" form="zmm {k}, m128" xed="VBROADCASTF32X4_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcast_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the 4 packed single-precision (32-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF32X4" form="zmm {z}, m128" xed="VBROADCASTF32X4_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcast_f64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 4)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X4" form="zmm, m256" xed="VBROADCASTF64X4_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcast_f64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X4" form="zmm {k}, m256" xed="VBROADCASTF64X4_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcast_f64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Broadcast the 4 packed double-precision (64-bit) floating-point elements from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTF64X4" form="zmm {z}, m256" xed="VBROADCASTF64X4_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcast_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 4)*32
+	dst[i+31:i] := a[n+31:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X4" form="zmm, m128" xed="VBROADCASTI32X4_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcast_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X4" form="zmm {k}, m128" xed="VBROADCASTI32X4_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcast_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the 4 packed 32-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	n := (j % 4)*32
+	IF k[j]
+		dst[i+31:i] := a[n+31:n]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI32X4" form="zmm {z}, m128" xed="VBROADCASTI32X4_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcast_i64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 4)*64
+	dst[i+63:i] := a[n+63:n]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X4" form="zmm, m256" xed="VBROADCASTI64X4_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcast_i64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X4" form="zmm {k}, m256" xed="VBROADCASTI64X4_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcast_i64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Broadcast the 4 packed 64-bit integers from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	n := (j % 4)*64
+	IF k[j]
+		dst[i+63:i] := a[n+63:n]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTI64X4" form="zmm {z}, m256" xed="VBROADCASTI64X4_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTSD" form="zmm, xmm" xed="VBROADCASTSD_ZMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTSD" form="zmm {k}, xmm" xed="VBROADCASTSD_ZMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcastsd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Broadcast the low double-precision (64-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTSD" form="zmm {z}, xmm" xed="VBROADCASTSD_ZMMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="zmm, xmm" xed="VBROADCASTSS_ZMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="zmm {k}, xmm" xed="VBROADCASTSS_ZMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcastss_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Broadcast the low single-precision (32-bit) floating-point element from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBROADCASTSS" form="zmm {z}, xmm" xed="VBROADCASTSS_ZMMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_round_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSD" form="k, xmm, xmm {sae}, imm8" xed="VCMPSD_MASKmskw_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSD" form="k, xmm, xmm, imm8" xed="VCMPSD_MASKmskw_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_round_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+IF k1[0]
+	k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSD" form="k {k}, xmm, xmm {sae}, imm8" xed="VCMPSD_MASKmskw_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_sd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+IF k1[0]
+	k[0] := ( a[63:0] OP b[63:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSD" form="k {k}, xmm, xmm, imm8" xed="VCMPSD_MASKmskw_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_round_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k". [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSS" form="k, xmm, xmm {sae}, imm8" xed="VCMPSS_MASKmskw_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cmp_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSS" form="k, xmm, xmm, imm8" xed="VCMPSS_MASKmskw_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_round_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set). [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+IF k1[0]
+	k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSS" form="k {k}, xmm, xmm {sae}, imm8" xed="VCMPSS_MASKmskw_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cmp_ss_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and store the result in mask vector "k" using zeromask "k1" (the element is zeroed out when mask bit 0 is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+IF k1[0]
+	k[0] := ( a[31:0] OP b[31:0] ) ? 1 : 0
+ELSE
+	k[0] := 0
+FI
+k[MAX:1] := 0
+	</operation>
+	<instruction name="VCMPSS" form="k {k}, xmm, xmm, imm8" xed="VCMPSS_MASKmskw_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_comi_round_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+RETURN ( a[63:0] OP b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="VCOMISD" form="xmm, xmm {sae}" xed="VCOMISD_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_comi_round_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", and return the boolean result (0 or 1). [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+RETURN ( a[31:0] OP b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="VCOMISS" form="xmm, xmm {sae}" xed="VCOMISS_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VUCOMISS" form="xmm, xmm {sae}" xed="VUCOMISS_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCOMPRESSPD" form="zmm {k}, zmm" xed="VCOMPRESSPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compressstoreu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VCOMPRESSPD" form="m512 {k}, zmm" xed="VCOMPRESSPD_MEMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_compress_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Contiguously store the active double-precision (64-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCOMPRESSPD" form="zmm {z}, zmm" xed="VCOMPRESSPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCOMPRESSPS" form="zmm {k}, zmm" xed="VCOMPRESSPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compressstoreu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VCOMPRESSPS" form="m512 {k}, zmm" xed="VCOMPRESSPS_MEMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_compress_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Contiguously store the active single-precision (32-bit) floating-point elements in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCOMPRESSPS" form="zmm {z}, zmm" xed="VCOMPRESSPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="zmm, ymm" xed="VCVTDQ2PD_ZMMf64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := src[m+63:m]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="zmm {k}, ymm" xed="VCVTDQ2PD_ZMMf64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+	ELSE
+		dst[m+63:m] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="zmm {z}, ymm" xed="VCVTDQ2PD_ZMMf64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="zmm, zmm {er}" xed="VCVTDQ2PS_ZMMf32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="zmm, zmm" xed="VCVTDQ2PS_ZMMf32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="zmm {k}, zmm {er}" xed="VCVTDQ2PS_ZMMf32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="zmm {k}, zmm" xed="VCVTDQ2PS_ZMMf32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="zmm {z}, zmm {er}" xed="VCVTDQ2PS_ZMMf32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PS" form="zmm {z}, zmm" xed="VCVTDQ2PS_ZMMf32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="ymm, zmm {er}" xed="VCVTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="ymm, zmm" xed="VCVTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="ymm {k}, zmm {er}" xed="VCVTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="ymm {k}, zmm" xed="VCVTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="ymm {z}, zmm {er}" xed="VCVTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2DQ" form="ymm {z}, zmm" xed="VCVTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="ymm, zmm {er}" xed="VCVTPD2PS_YMMf32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="ymm, zmm" xed="VCVTPD2PS_YMMf32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="ymm {k}, zmm {er}" xed="VCVTPD2PS_YMMf32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="ymm {k}, zmm" xed="VCVTPD2PS_YMMf32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="ymm {z}, zmm {er}" xed="VCVTPD2PS_YMMf32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_FP32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="ymm {z}, zmm" xed="VCVTPD2PS_YMMf32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="ymm, zmm {er}" xed="VCVTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="ymm, zmm" xed="VCVTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="ymm {k}, zmm {er}" xed="VCVTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="ymm {k}, zmm" xed="VCVTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="ymm {z}, zmm {er}" xed="VCVTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPD2UDQ" form="ymm {z}, zmm" xed="VCVTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m256i" varname="a" etype="FP16"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="zmm, ymm {sae}" xed="VCVTPH2PS_ZMMf32_MASKmskw_YMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m256i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="zmm, ymm" xed="VCVTPH2PS_ZMMf32_MASKmskw_YMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="FP16"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="zmm {k}, ymm {sae}" xed="VCVTPH2PS_ZMMf32_MASKmskw_YMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="zmm {k}, ymm" xed="VCVTPH2PS_ZMMf32_MASKmskw_YMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="FP16"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="zmm {z}, ymm {sae}" xed="VCVTPH2PS_ZMMf32_MASKmskw_YMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*16
+	IF k[j]
+		dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="zmm {z}, ymm" xed="VCVTPH2PS_ZMMf32_MASKmskw_YMMf16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="zmm, zmm {er}" xed="VCVTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="zmm, zmm" xed="VCVTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="zmm {k}, zmm {er}" xed="VCVTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="zmm {k}, zmm" xed="VCVTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="zmm {z}, zmm {er}" xed="VCVTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2DQ" form="zmm {z}, zmm" xed="VCVTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm, ymm {sae}" xed="VCVTPS2PD_ZMMf64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm, ymm" xed="VCVTPS2PD_ZMMf64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm {k}, ymm {sae}" xed="VCVTPS2PD_ZMMf64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm {k}, ymm" xed="VCVTPS2PD_ZMMf64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm {z}, ymm {sae}" xed="VCVTPS2PD_ZMMf64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := Convert_FP32_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm {z}, ymm" xed="VCVTPS2PD_ZMMf64_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="ymm, zmm {sae}" xed="VCVTPS2PH_YMMf16_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst". [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="ymm, zmm {sae}" xed="VCVTPS2PH_YMMf16_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="ymm {k}, zmm {sae}" xed="VCVTPS2PH_YMMf16_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="ymm {k}, zmm {sae}" xed="VCVTPS2PH_YMMf16_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="ymm {z}, zmm {sae}" xed="VCVTPS2PH_YMMf16_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 16*j
+	l := 32*j
+	IF k[j]
+		dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="ymm {z}, zmm {sae}" xed="VCVTPS2PH_YMMf16_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="zmm, zmm {er}" xed="VCVTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="zmm, zmm" xed="VCVTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="zmm {k}, zmm {er}" xed="VCVTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="zmm {k}, zmm" xed="VCVTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="zmm {z}, zmm {er}" xed="VCVTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2UDQ" form="zmm {z}, zmm" xed="VCVTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2SI" form="r32, xmm {er}" xed="VCVTSD2SI_GPR32i32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2SI" form="r64, xmm {er}" xed="VCVTSD2SI_GPR64i64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsd_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2SI" form="r32, xmm {er}" xed="VCVTSD2SI_GPR32i32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsd_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2SI" form="r64, xmm {er}" xed="VCVTSD2SI_GPR64i64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2SI" form="r32, xmm" xed="VCVTSD2SI_GPR32i32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2SI" form="r64, xmm" xed="VCVTSD2SI_GPR64i64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSD2SS" form="xmm, xmm, xmm {er}" xed="VCVTSD2SS_XMMf32_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvt_roundsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSD2SS" form="xmm {k}, xmm, xmm {er}" xed="VCVTSD2SS_XMMf32_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSD2SS" form="xmm {k}, xmm, xmm" xed="VCVTSD2SS_XMMf32_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvt_roundsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSD2SS" form="xmm {z}, xmm, xmm {er}" xed="VCVTSD2SS_XMMf32_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtsd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSD2SS" form="xmm {z}, xmm, xmm" xed="VCVTSD2SS_XMMf32_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UInt32(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2USI" form="r32, xmm {er}" xed="VCVTSD2USI_GPR32u32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UInt64(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2USI" form="r64, xmm {er}" xed="VCVTSD2USI_GPR64u64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UInt32(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2USI" form="r32, xmm" xed="VCVTSD2USI_GPR32u32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UInt64(a[63:0])
+	</operation>
+	<instruction name="VCVTSD2USI" form="r64, xmm" xed="VCVTSD2USI_GPR64u64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundi64_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SD" form="xmm, xmm, r64 {er}" xed="VCVTSI2SD_XMMf64_XMMf64_GPR64i64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsi64_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SD" form="xmm, xmm, r64 {er}" xed="VCVTSI2SD_XMMf64_XMMf64_GPR64i64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvti32_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="b" etype="SI32"/>
+	<description>Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_Int32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SD" form="xmm, xmm, r32" xed="VCVTSI2SD_XMMf64_XMMf64_GPR32i32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvti64_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<description>Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SD" form="xmm, xmm, r64" xed="VCVTSI2SD_XMMf64_XMMf64_GPR64i64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundi32_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="b" etype="SI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SS" form="xmm, xmm, r32 {er}" xed="VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundi64_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SS" form="xmm, xmm, r64 {er}" xed="VCVTSI2SS_XMMf32_XMMf32_GPR64i64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsi32_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="b" etype="SI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SS" form="xmm, xmm, r32 {er}" xed="VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundsi64_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SS" form="xmm, xmm, r64 {er}" xed="VCVTSI2SS_XMMf32_XMMf32_GPR64i64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvti32_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="b" etype="SI32"/>
+	<description>Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SS" form="xmm, xmm, r32" xed="VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvti64_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<description>Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSI2SS" form="xmm, xmm, r64" xed="VCVTSI2SS_XMMf32_XMMf32_GPR64i64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
+	[sae_note]</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSS2SD" form="xmm, xmm, xmm {sae}" xed="VCVTSS2SD_XMMf64_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvt_roundss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSS2SD" form="xmm {k}, xmm, xmm {sae}" xed="VCVTSS2SD_XMMf64_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSS2SD" form="xmm {k}, xmm, xmm" xed="VCVTSS2SD_XMMf64_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvt_roundss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
+	[sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSS2SD" form="xmm {z}, xmm, xmm {sae}" xed="VCVTSS2SD_XMMf64_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtss_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTSS2SD" form="xmm {z}, xmm, xmm" xed="VCVTSS2SD_XMMf64_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2SI" form="r32, xmm {er}" xed="VCVTSS2SI_GPR32i32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2SI" form="r64, xmm {er}" xed="VCVTSS2SI_GPR64i64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundss_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2SI" form="r32, xmm {er}" xed="VCVTSS2SI_GPR32i32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundss_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2SI" form="r64, xmm {er}" xed="VCVTSS2SI_GPR64i64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2SI" form="r32, xmm" xed="VCVTSS2SI_GPR32i32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2SI" form="r64, xmm" xed="VCVTSS2SI_GPR64i64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UInt32(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2USI" form="r32, xmm {er}" xed="VCVTSS2USI_GPR32u32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UInt64(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2USI" form="r64, xmm {er}" xed="VCVTSS2USI_GPR64u64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UInt32(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2USI" form="r32, xmm" xed="VCVTSS2USI_GPR32u32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UInt64(a[31:0])
+	</operation>
+	<instruction name="VCVTSS2USI" form="r64, xmm" xed="VCVTSS2USI_GPR64u64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="ymm, zmm {sae}" xed="VCVTTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="ymm, zmm" xed="VCVTTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="ymm {k}, zmm {sae}" xed="VCVTTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="ymm {k}, zmm" xed="VCVTTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="ymm {z}, zmm {sae}" xed="VCVTTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2DQ" form="ymm {z}, zmm" xed="VCVTTPD2DQ_YMMi32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="ymm, zmm {sae}" xed="VCVTTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[k+63:k])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="ymm, zmm" xed="VCVTTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).   [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="ymm {k}, zmm {sae}" xed="VCVTTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="ymm {k}, zmm" xed="VCVTTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="ymm {z}, zmm {sae}" xed="VCVTTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttpd_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 32*j
+	l := 64*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[l+63:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTTPD2UDQ" form="ymm {z}, zmm" xed="VCVTTPD2UDQ_YMMu32_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="zmm, zmm {sae}" xed="VCVTTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="zmm, zmm" xed="VCVTTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="zmm {k}, zmm {sae}" xed="VCVTTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="zmm {k}, zmm" xed="VCVTTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="zmm {z}, zmm {sae}" xed="VCVTTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2DQ" form="zmm {z}, zmm" xed="VCVTTPS2DQ_ZMMi32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="zmm, zmm {sae}" xed="VCVTTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="zmm, zmm" xed="VCVTTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).   [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="zmm {k}, zmm {sae}" xed="VCVTTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="zmm {k}, zmm" xed="VCVTTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtt_roundps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP32_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="zmm {z}, zmm {sae}" xed="VCVTTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvttps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed double-precision (32-bit) floating-point elements in "a" to packed unsigned 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_FP64_To_UInt32_Truncate(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTTPS2UDQ" form="zmm {z}, zmm" xed="VCVTTPS2UDQ_ZMMu32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2SI" form="r32, xmm {sae}" xed="VCVTTSD2SI_GPR32i32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2SI" form="r64, xmm {sae}" xed="VCVTTSD2SI_GPR64i64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundsd_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2SI" form="r32, xmm {sae}" xed="VCVTTSD2SI_GPR32i32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundsd_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2SI" form="r64, xmm {sae}" xed="VCVTTSD2SI_GPR64i64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttsd_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2SI" form="r32, xmm" xed="VCVTTSD2SI_GPR32i32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttsd_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2SI" form="r64, xmm" xed="VCVTTSD2SI_GPR64i64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2USI" form="r32, xmm {sae}" xed="VCVTTSD2USI_GPR32u32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2USI" form="r64, xmm {sae}" xed="VCVTTSD2USI_GPR64u64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttsd_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_UInt32_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2USI" form="r32, xmm" xed="VCVTTSD2USI_GPR32u32_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttsd_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_UInt64_Truncate(a[63:0])
+	</operation>
+	<instruction name="VCVTTSD2USI" form="r64, xmm" xed="VCVTTSD2USI_GPR64u64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2SI" form="r32, xmm {sae}" xed="VCVTTSS2SI_GPR32i32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2SI" form="r64, xmm {sae}" xed="VCVTTSS2SI_GPR64i64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundss_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2SI" form="r32, xmm {sae}" xed="VCVTTSS2SI_GPR32i32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundss_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2SI" form="r64, xmm {sae}" xed="VCVTTSS2SI_GPR64i64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttss_i32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2SI" form="r32, xmm" xed="VCVTTSS2SI_GPR32i32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttss_i64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2SI" form="r64, xmm" xed="VCVTTSS2SI_GPR64i64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2USI" form="r32, xmm {sae}" xed="VCVTTSS2USI_GPR32u32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtt_roundss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".
+	[sae_note]</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2USI" form="r64, xmm {sae}" xed="VCVTTSS2USI_GPR64u64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttss_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_UInt32_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2USI" form="r32, xmm" xed="VCVTTSS2USI_GPR32u32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvttss_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to an unsigned 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_UInt64_Truncate(a[31:0])
+	</operation>
+	<instruction name="VCVTTSS2USI" form="r64, xmm" xed="VCVTTSS2USI_GPR64u64_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="zmm, ymm" xed="VCVTUDQ2PD_ZMMf64_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="zmm {k}, ymm" xed="VCVTUDQ2PD_ZMMf64_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[i+63:i] := Convert_Int64_To_FP64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="zmm {z}, ymm" xed="VCVTUDQ2PD_ZMMf64_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvt_roundepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PS" form="zmm, zmm {er}" xed="VCVTUDQ2PS_ZMMf32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PS" form="zmm, zmm" xed="VCVTUDQ2PS_ZMMf32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvt_roundepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PS" form="zmm {k}, zmm {er}" xed="VCVTUDQ2PS_ZMMf32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PS" form="zmm {k}, zmm" xed="VCVTUDQ2PS_ZMMf32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvt_roundepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PS" form="zmm {z}, zmm {er}" xed="VCVTUDQ2PS_ZMMf32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PS" form="zmm {z}, zmm" xed="VCVTUDQ2PS_ZMMf32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundu64_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUSI2SD" form="xmm, xmm, r64 {er}" xed="VCVTUSI2SD_XMMf64_XMMf64_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtu32_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<description>Convert the unsigned 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_Int32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUSI2SD" form="xmm, xmm, r32" xed="VCVTUSI2SD_XMMf64_XMMf64_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtu64_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<description>Convert the unsigned 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUSI2SD" form="xmm, xmm, r64" xed="VCVTUSI2SD_XMMf64_XMMf64_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundu32_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUSI2SS" form="xmm, xmm, r32 {er}" xed="VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvt_roundu64_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUSI2SS" form="xmm, xmm, r64 {er}" xed="VCVTUSI2SS_XMMf32_XMMf32_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtu32_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<description>Convert the unsigned 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUSI2SS" form="xmm, xmm, r32" xed="VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtu64_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<description>Convert the unsigned 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTUSI2SS" form="xmm, xmm, r64" xed="VCVTUSI2SS_XMMf32_XMMf32_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPD" form="zmm, zmm, zmm" xed="VDIVPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_div_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", =and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPD" form="zmm, zmm, zmm {er}" xed="VDIVPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPD" form="zmm {k}, zmm, zmm" xed="VDIVPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_div_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPD" form="zmm {k}, zmm, zmm {er}" xed="VDIVPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_div_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPD" form="zmm {z}, zmm, zmm" xed="VDIVPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_div_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] / b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPD" form="zmm {z}, zmm, zmm {er}" xed="VDIVPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPS" form="zmm, zmm, zmm" xed="VDIVPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_div_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPS" form="zmm, zmm, zmm {er}" xed="VDIVPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPS" form="zmm {k}, zmm, zmm" xed="VDIVPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_div_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPS" form="zmm {k}, zmm, zmm {er}" xed="VDIVPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_div_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPS" form="zmm {z}, zmm, zmm" xed="VDIVPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_div_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] / b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDIVPS" form="zmm {z}, zmm, zmm {er}" xed="VDIVPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_div_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+		[round_note]</description>
+	<operation>
+dst[63:0] := a[63:0] / b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSD" form="xmm, xmm, xmm {er}" xed="VDIVSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_div_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSD" form="xmm {k}, xmm, xmm {er}" xed="VDIVSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_div_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSD" form="xmm {k}, xmm, xmm" xed="VDIVSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_div_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSD" form="xmm {z}, xmm, xmm {er}" xed="VDIVSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_div_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] / b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSD" form="xmm {z}, xmm, xmm" xed="VDIVSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_div_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]</description>
+	<operation>
+dst[31:0] := a[31:0] / b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSS" form="xmm, xmm, xmm {er}" xed="VDIVSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_div_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSS" form="xmm {k}, xmm, xmm {er}" xed="VDIVSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_div_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSS" form="xmm {k}, xmm, xmm" xed="VDIVSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_div_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSS" form="xmm {z}, xmm, xmm {er}" xed="VDIVSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_div_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] / b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDIVSS" form="xmm {z}, xmm, xmm" xed="VDIVSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="zmm {k}, zmm" xed="VEXPANDPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="zmm {k}, m512" xed="VEXPANDPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="zmm {z}, zmm" xed="VEXPANDPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expandloadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPD" form="zmm {z}, m512" xed="VEXPANDPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="zmm {k}, zmm" xed="VEXPANDPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="zmm {k}, m512" xed="VEXPANDPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="zmm {z}, zmm" xed="VEXPANDPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expandloadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXPANDPS" form="zmm {z}, m512" xed="VEXPANDPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[1:0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X4" form="xmm, zmm, imm8" xed="VEXTRACTF32X4_XMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X4" form="xmm {k}, zmm, imm8" xed="VEXTRACTF32X4_XMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extractf32x4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTF32X4" form="xmm {z}, zmm, imm8" xed="VEXTRACTF32X4_XMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extractf64x4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X4" form="ymm, zmm, imm8" xed="VEXTRACTF64X4_YMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extractf64x4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X4" form="ymm {k}, zmm, imm8" xed="VEXTRACTF64X4_YMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extractf64x4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTF64X4" form="ymm {z}, zmm, imm8" xed="VEXTRACTF64X4_YMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[1:0] OF
+0: dst[127:0] := a[127:0]
+1: dst[127:0] := a[255:128]
+2: dst[127:0] := a[383:256]
+3: dst[127:0] := a[511:384]
+ESAC
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X4" form="xmm, zmm, imm8" xed="VEXTRACTI32X4_XMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X4" form="xmm {k}, zmm, imm8" xed="VEXTRACTI32X4_XMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extracti32x4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract 128 bits (composed of 4 packed 32-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[1:0] OF
+0: tmp[127:0] := a[127:0]
+1: tmp[127:0] := a[255:128]
+2: tmp[127:0] := a[383:256]
+3: tmp[127:0] := a[511:384]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VEXTRACTI32X4" form="xmm {z}, zmm, imm8" xed="VEXTRACTI32X4_XMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_extracti64x4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+CASE imm8[0] OF
+0: dst[255:0] := a[255:0]
+1: dst[255:0] := a[511:256]
+ESAC
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X4" form="ymm, zmm, imm8" xed="VEXTRACTI64X4_YMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_extracti64x4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X4" form="ymm {k}, zmm, imm8" xed="VEXTRACTI64X4_YMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_extracti64x4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract 256 bits (composed of 4 packed 64-bit integers) from "a", selected with "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+CASE imm8[0] OF
+0: tmp[255:0] := a[255:0]
+1: tmp[255:0] := a[511:256]
+ESAC
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VEXTRACTI64X4" form="ymm {z}, zmm, imm8" xed="VEXTRACTI64X4_YMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="zmm, zmm, zmm, imm8" xed="VFIXUPIMMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fixupimm_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="zmm, zmm, zmm, imm8 {sae}" xed="VFIXUPIMMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="zmm {k}, zmm, zmm, imm8" xed="VFIXUPIMMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fixupimm_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="zmm {k}, zmm, zmm, imm8 {sae}" xed="VFIXUPIMMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fixupimm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="zmm {z}, zmm, zmm, imm8" xed="VFIXUPIMMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fixupimm_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up packed double-precision (64-bit) floating-point elements in "a" and "b" using packed 64-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FIXUPIMMPD(a[i+63:i], b[i+63:i], c[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPD" form="zmm {z}, zmm, zmm, imm8 {sae}" xed="VFIXUPIMMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="zmm, zmm, zmm, imm8" xed="VFIXUPIMMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fixupimm_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="zmm, zmm, zmm, imm8 {sae}" xed="VFIXUPIMMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="zmm {k}, zmm, zmm, imm8" xed="VFIXUPIMMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fixupimm_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="zmm {k}, zmm, zmm, imm8 {sae}" xed="VFIXUPIMMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fixupimm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="zmm {z}, zmm, zmm, imm8" xed="VFIXUPIMMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fixupimm_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up packed single-precision (32-bit) floating-point elements in "a" and "b" using packed 32-bit integers in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FIXUPIMMPD(a[i+31:i], b[i+31:i], c[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPIMMPS" form="zmm {z}, zmm, zmm, imm8 {sae}" xed="VFIXUPIMMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fixupimm_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSD" form="xmm, xmm, xmm, imm8 {sae}" xed="VFIXUPIMMSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fixupimm_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSD" form="xmm, xmm, xmm, imm8" xed="VFIXUPIMMSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fixupimm_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSD" form="xmm {k}, xmm, xmm, imm8 {sae}" xed="VFIXUPIMMSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fixupimm_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSD" form="xmm {k}, xmm, xmm, imm8" xed="VFIXUPIMMSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fixupimm_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSD" form="xmm {z}, xmm, xmm, imm8 {sae}" xed="VFIXUPIMMSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fixupimm_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up the lower double-precision (64-bit) floating-point elements in "a" and "b" using the lower 64-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[63:0], src2[63:0], src3[63:0], imm8[7:0]) {
+	tsrc[63:0] := ((src2[62:52] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[63:0]
+	CASE(tsrc[63:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[63:0] := src1[63:0]
+	1 : dest[63:0] := tsrc[63:0]
+	2 : dest[63:0] := QNaN(tsrc[63:0])
+	3 : dest[63:0] := QNAN_Indefinite
+	4 : dest[63:0] := -INF
+	5 : dest[63:0] := +INF
+	6 : dest[63:0] := tsrc.sign? -INF : +INF
+	7 : dest[63:0] := -0
+	8 : dest[63:0] := +0
+	9 : dest[63:0] := -1
+	10: dest[63:0] := +1
+	11: dest[63:0] := 1/2
+	12: dest[63:0] := 90.0
+	13: dest[63:0] := PI/2
+	14: dest[63:0] := MAX_FLOAT
+	15: dest[63:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[63:0]
+}
+IF k[0]
+	dst[63:0] := FIXUPIMMPD(a[63:0], b[63:0], c[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSD" form="xmm {z}, xmm, xmm, imm8" xed="VFIXUPIMMSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fixupimm_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSS" form="xmm, xmm, xmm, imm8 {sae}" xed="VFIXUPIMMSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fixupimm_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSS" form="xmm, xmm, xmm, imm8" xed="VFIXUPIMMSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fixupimm_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSS" form="xmm {k}, xmm, xmm, imm8 {sae}" xed="VFIXUPIMMSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fixupimm_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSS" form="xmm {k}, xmm, xmm, imm8" xed="VFIXUPIMMSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fixupimm_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.
+	[sae_note]</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSS" form="xmm {z}, xmm, xmm, imm8 {sae}" xed="VFIXUPIMMSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fixupimm_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Fix up the lower single-precision (32-bit) floating-point elements in "a" and "b" using the lower 32-bit integer in "c", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". "imm8" is used to set the required flags reporting.</description>
+	<operation>enum TOKEN_TYPE {
+	QNAN_TOKEN := 0, \
+	SNAN_TOKEN := 1, \
+	ZERO_VALUE_TOKEN := 2, \
+	ONE_VALUE_TOKEN := 3, \
+	NEG_INF_TOKEN := 4, \
+	POS_INF_TOKEN := 5, \
+	NEG_VALUE_TOKEN := 6, \
+	POS_VALUE_TOKEN := 7
+}
+DEFINE FIXUPIMMPD(src1[31:0], src2[31:0], src3[31:0], imm8[7:0]) {
+	tsrc[31:0] := ((src2[30:23] == 0) AND (MXCSR.DAZ == 1)) ? 0.0 : src2[31:0]
+	CASE(tsrc[31:0]) OF
+	QNAN_TOKEN:j := 0
+	SNAN_TOKEN:j := 1
+	ZERO_VALUE_TOKEN: j := 2
+	ONE_VALUE_TOKEN: j := 3
+	NEG_INF_TOKEN: j := 4
+	POS_INF_TOKEN: j := 5
+	NEG_VALUE_TOKEN: j := 6
+	POS_VALUE_TOKEN: j := 7
+	ESAC
+	
+	token_response[3:0] := src3[3+4*j:4*j]
+	
+	CASE(token_response[3:0]) OF
+	0 : dest[31:0] := src1[31:0]
+	1 : dest[31:0] := tsrc[31:0]
+	2 : dest[31:0] := QNaN(tsrc[31:0])
+	3 : dest[31:0] := QNAN_Indefinite
+	4 : dest[31:0] := -INF
+	5 : dest[31:0] := +INF
+	6 : dest[31:0] := tsrc.sign? -INF : +INF
+	7 : dest[31:0] := -0
+	8 : dest[31:0] := +0
+	9 : dest[31:0] := -1
+	10: dest[31:0] := +1
+	11: dest[31:0] := 1/2
+	12: dest[31:0] := 90.0
+	13: dest[31:0] := PI/2
+	14: dest[31:0] := MAX_FLOAT
+	15: dest[31:0] := -MAX_FLOAT
+	ESAC
+	
+	CASE(tsrc[31:0]) OF
+	ZERO_VALUE_TOKEN:
+		IF (imm8[0]) #ZE; FI
+	ZERO_VALUE_TOKEN:
+		IF (imm8[1]) #IE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[2]) #ZE; FI
+	ONE_VALUE_TOKEN:
+		IF (imm8[3]) #IE; FI
+	SNAN_TOKEN:
+		IF (imm8[4]) #IE; FI
+	NEG_INF_TOKEN:
+		IF (imm8[5]) #IE; FI
+	NEG_VALUE_TOKEN:
+		IF (imm8[6]) #IE; FI
+	POS_INF_TOKEN:
+		IF (imm8[7]) #IE; FI
+	ESAC
+	RETURN dest[31:0]
+}
+IF k[0]
+	dst[31:0] := FIXUPIMMPD(a[31:0], b[31:0], c[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFIXUPIMMSS" form="xmm {z}, xmm, xmm, imm8" xed="VFIXUPIMMSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm {z}, zmm, zmm" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm {z}, zmm, zmm" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm {z}, zmm, zmm" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm {z}, zmm, zmm {er}" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm {z}, zmm, zmm {er}" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm {z}, zmm, zmm {er}" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm {z}, zmm, zmm" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm {z}, zmm, zmm" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm {z}, zmm, zmm" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "a" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm {z}, zmm, zmm {er}" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm {z}, zmm, zmm {er}" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm {z}, zmm, zmm {er}" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm, xmm, xmm {er}" xed="VFMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213SD" form="xmm, xmm, xmm {er}" xed="VFMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231SD" form="xmm, xmm, xmm {er}" xed="VFMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm {k}, xmm, xmm {er}" xed="VFMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213SD" form="xmm {k}, xmm, xmm {er}" xed="VFMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231SD" form="xmm {k}, xmm, xmm {er}" xed="VFMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm {k}, xmm, xmm" xed="VFMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213SD" form="xmm {k}, xmm, xmm" xed="VFMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231SD" form="xmm {k}, xmm, xmm" xed="VFMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm {k}, xmm, xmm {er}" xed="VFMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213SD" form="xmm {k}, xmm, xmm {er}" xed="VFMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231SD" form="xmm {k}, xmm, xmm {er}" xed="VFMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm {k}, xmm, xmm" xed="VFMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213SD" form="xmm {k}, xmm, xmm" xed="VFMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231SD" form="xmm {k}, xmm, xmm" xed="VFMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm {z}, xmm, xmm {er}" xed="VFMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213SD" form="xmm {z}, xmm, xmm {er}" xed="VFMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231SD" form="xmm {z}, xmm, xmm {er}" xed="VFMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm {z}, xmm, xmm" xed="VFMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD213SD" form="xmm {z}, xmm, xmm" xed="VFMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMADD231SD" form="xmm {z}, xmm, xmm" xed="VFMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm {k}, xmm, xmm {er}" xed="VFMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213SS" form="xmm {k}, xmm, xmm {er}" xed="VFMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231SS" form="xmm {k}, xmm, xmm {er}" xed="VFMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm {k}, xmm, xmm" xed="VFMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213SS" form="xmm {k}, xmm, xmm" xed="VFMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231SS" form="xmm {k}, xmm, xmm" xed="VFMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm, xmm, xmm {er}" xed="VFMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213SS" form="xmm, xmm, xmm {er}" xed="VFMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231SS" form="xmm, xmm, xmm {er}" xed="VFMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm {k}, xmm, xmm {er}" xed="VFMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213SS" form="xmm {k}, xmm, xmm {er}" xed="VFMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231SS" form="xmm {k}, xmm, xmm {er}" xed="VFMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm {k}, xmm, xmm" xed="VFMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213SS" form="xmm {k}, xmm, xmm" xed="VFMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231SS" form="xmm {k}, xmm, xmm" xed="VFMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm {z}, xmm, xmm {er}" xed="VFMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213SS" form="xmm {z}, xmm, xmm {er}" xed="VFMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231SS" form="xmm {z}, xmm, xmm {er}" xed="VFMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm {z}, xmm, xmm" xed="VFMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD213SS" form="xmm {z}, xmm, xmm" xed="VFMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMADD231SS" form="xmm {z}, xmm, xmm" xed="VFMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF ((j &amp; 1) == 0)
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm, zmm, zmm" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm, zmm, zmm" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm, zmm, zmm" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF ((j &amp; 1) == 0)
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm, zmm, zmm {er}" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm, zmm, zmm {er}" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm, zmm, zmm {er}" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm {k}, zmm, zmm" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm {k}, zmm, zmm" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm {k}, zmm, zmm" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE 
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm {k}, zmm, zmm" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm {k}, zmm, zmm" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm {k}, zmm, zmm" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm {z}, zmm, zmm" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm {z}, zmm, zmm" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm {z}, zmm, zmm" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmaddsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="zmm {z}, zmm, zmm {er}" xed="VFMADDSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB213PD" form="zmm {z}, zmm, zmm {er}" xed="VFMADDSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADDSUB231PD" form="zmm {z}, zmm, zmm {er}" xed="VFMADDSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF ((j &amp; 1) == 0)
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm, zmm, zmm" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm, zmm, zmm" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm, zmm, zmm" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF ((j &amp; 1) == 0)
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm, zmm, zmm {er}" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm, zmm, zmm {er}" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm, zmm, zmm {er}" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm {k}, zmm, zmm" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm {k}, zmm, zmm" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm {k}, zmm, zmm" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm {k}, zmm, zmm" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm {k}, zmm, zmm" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm {k}, zmm, zmm" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm {z}, zmm, zmm" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm {z}, zmm, zmm" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm {z}, zmm, zmm" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmaddsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="zmm {z}, zmm, zmm {er}" xed="VFMADDSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB213PS" form="zmm {z}, zmm, zmm {er}" xed="VFMADDSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADDSUB231PS" form="zmm {z}, zmm, zmm {er}" xed="VFMADDSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm {z}, zmm, zmm" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm {z}, zmm, zmm" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm {z}, zmm, zmm" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm {z}, zmm, zmm {er}" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm {z}, zmm, zmm {er}" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm {z}, zmm, zmm {er}" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm {z}, zmm, zmm" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm {z}, zmm, zmm" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm {z}, zmm, zmm" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm {z}, zmm, zmm {er}" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm {z}, zmm, zmm {er}" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm {z}, zmm, zmm {er}" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm, xmm, xmm {er}" xed="VFMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213SD" form="xmm, xmm, xmm {er}" xed="VFMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231SD" form="xmm, xmm, xmm {er}" xed="VFMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213SD" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231SD" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm {k}, xmm, xmm" xed="VFMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213SD" form="xmm {k}, xmm, xmm" xed="VFMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231SD" form="xmm {k}, xmm, xmm" xed="VFMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213SD" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231SD" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm {k}, xmm, xmm" xed="VFMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213SD" form="xmm {k}, xmm, xmm" xed="VFMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231SD" form="xmm {k}, xmm, xmm" xed="VFMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm {z}, xmm, xmm {er}" xed="VFMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213SD" form="xmm {z}, xmm, xmm {er}" xed="VFMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231SD" form="xmm {z}, xmm, xmm {er}" xed="VFMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm {z}, xmm, xmm" xed="VFMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB213SD" form="xmm {z}, xmm, xmm" xed="VFMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFMSUB231SD" form="xmm {z}, xmm, xmm" xed="VFMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm, xmm, xmm {er}" xed="VFMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213SS" form="xmm, xmm, xmm {er}" xed="VFMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231SS" form="xmm, xmm, xmm {er}" xed="VFMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213SS" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231SS" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm {k}, xmm, xmm" xed="VFMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213SS" form="xmm {k}, xmm, xmm" xed="VFMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231SS" form="xmm {k}, xmm, xmm" xed="VFMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213SS" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231SS" form="xmm {k}, xmm, xmm {er}" xed="VFMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm {k}, xmm, xmm" xed="VFMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213SS" form="xmm {k}, xmm, xmm" xed="VFMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231SS" form="xmm {k}, xmm, xmm" xed="VFMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm {z}, xmm, xmm {er}" xed="VFMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213SS" form="xmm {z}, xmm, xmm {er}" xed="VFMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231SS" form="xmm {z}, xmm, xmm {er}" xed="VFMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm {z}, xmm, xmm" xed="VFMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB213SS" form="xmm {z}, xmm, xmm" xed="VFMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFMSUB231SS" form="xmm {z}, xmm, xmm" xed="VFMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF ((j &amp; 1) == 0)
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm, zmm, zmm" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm, zmm, zmm" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm, zmm, zmm" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF ((j &amp; 1) == 0)
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm, zmm, zmm {er}" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm, zmm, zmm {er}" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm, zmm, zmm {er}" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm {k}, zmm, zmm" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm {k}, zmm, zmm" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm {k}, zmm, zmm" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm {k}, zmm, zmm" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm {k}, zmm, zmm" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm {k}, zmm, zmm" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm {z}, zmm, zmm" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm {z}, zmm, zmm" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm {z}, zmm, zmm" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsubadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+		ELSE
+			dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="zmm {z}, zmm, zmm {er}" xed="VFMSUBADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD213PD" form="zmm {z}, zmm, zmm {er}" xed="VFMSUBADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUBADD231PD" form="zmm {z}, zmm, zmm {er}" xed="VFMSUBADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF ((j &amp; 1) == 0)
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm, zmm, zmm" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm, zmm, zmm" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm, zmm, zmm" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF ((j &amp; 1) == 0)
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm, zmm, zmm {er}" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm, zmm, zmm {er}" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm, zmm, zmm {er}" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm {k}, zmm, zmm" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm {k}, zmm, zmm" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm {k}, zmm, zmm" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask3_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm {k}, zmm, zmm" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm {k}, zmm, zmm" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm {k}, zmm, zmm" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm {z}, zmm, zmm" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm {z}, zmm, zmm" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm {z}, zmm, zmm" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fmsubadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF ((j &amp; 1) == 0)
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+		ELSE
+			dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="zmm {z}, zmm, zmm {er}" xed="VFMSUBADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD213PS" form="zmm {z}, zmm, zmm {er}" xed="VFMSUBADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUBADD231PS" form="zmm {z}, zmm, zmm {er}" xed="VFMSUBADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm {z}, zmm, zmm" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm {z}, zmm, zmm" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm {z}, zmm, zmm" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm {z}, zmm, zmm {er}" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm {z}, zmm, zmm {er}" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm {z}, zmm, zmm {er}" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm {z}, zmm, zmm" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm {z}, zmm, zmm" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm {z}, zmm, zmm" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm {z}, zmm, zmm {er}" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm {z}, zmm, zmm {er}" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm {z}, zmm, zmm {er}" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SD" form="xmm, xmm, xmm {er}" xed="VFNMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213SD" form="xmm, xmm, xmm {er}" xed="VFNMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231SD" form="xmm, xmm, xmm {er}" xed="VFNMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SD" form="xmm {k}, xmm, xmm" xed="VFNMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213SD" form="xmm {k}, xmm, xmm" xed="VFNMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231SD" form="xmm {k}, xmm, xmm" xed="VFNMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SD" form="xmm {k}, xmm, xmm" xed="VFNMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213SD" form="xmm {k}, xmm, xmm" xed="VFNMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231SD" form="xmm {k}, xmm, xmm" xed="VFNMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmadd_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SD" form="xmm {z}, xmm, xmm {er}" xed="VFNMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD213SD" form="xmm {z}, xmm, xmm {er}" xed="VFNMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231SD" form="xmm {z}, xmm, xmm {er}" xed="VFNMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD213SD" form="xmm {z}, xmm, xmm" xed="VFNMADD213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD231SD" form="xmm {z}, xmm, xmm" xed="VFNMADD231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMADD132SD" form="xmm {z}, xmm, xmm" xed="VFNMADD132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm, xmm, xmm {er}" xed="VFNMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213SS" form="xmm, xmm, xmm {er}" xed="VFNMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231SS" form="xmm, xmm, xmm {er}" xed="VFNMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm {k}, xmm, xmm" xed="VFNMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213SS" form="xmm {k}, xmm, xmm" xed="VFNMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231SS" form="xmm {k}, xmm, xmm" xed="VFNMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm {k}, xmm, xmm" xed="VFNMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213SS" form="xmm {k}, xmm, xmm" xed="VFNMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231SS" form="xmm {k}, xmm, xmm" xed="VFNMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmadd_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". 
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm {z}, xmm, xmm {er}" xed="VFNMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213SS" form="xmm {z}, xmm, xmm {er}" xed="VFNMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231SS" form="xmm {z}, xmm, xmm {er}" xed="VFNMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm {z}, xmm, xmm" xed="VFNMADD132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD213SS" form="xmm {z}, xmm, xmm" xed="VFNMADD213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMADD231SS" form="xmm {z}, xmm, xmm" xed="VFNMADD231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm {z}, zmm, zmm" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm {z}, zmm, zmm" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm {z}, zmm, zmm" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm {z}, zmm, zmm {er}" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm {z}, zmm, zmm {er}" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm {z}, zmm, zmm {er}" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm {z}, zmm, zmm" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm {z}, zmm, zmm" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm {z}, zmm, zmm" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="const int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm {z}, zmm, zmm {er}" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm {z}, zmm, zmm {er}" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm {z}, zmm, zmm {er}" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm, xmm, xmm {er}" xed="VFNMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213SD" form="xmm, xmm, xmm {er}" xed="VFNMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231SD" form="xmm, xmm, xmm {er}" xed="VFNMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "c" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := c[63:0]
+FI
+dst[127:64] := c[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm {k}, xmm, xmm" xed="VFNMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213SD" form="xmm {k}, xmm, xmm" xed="VFNMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231SD" form="xmm {k}, xmm, xmm" xed="VFNMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231SD" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := a[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm {k}, xmm, xmm" xed="VFNMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213SD" form="xmm {k}, xmm, xmm" xed="VFNMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231SD" form="xmm {k}, xmm, xmm" xed="VFNMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmsub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm {z}, xmm, xmm {er}" xed="VFNMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213SD" form="xmm {z}, xmm, xmm {er}" xed="VFNMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231SD" form="xmm {z}, xmm, xmm {er}" xed="VFNMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm {z}, xmm, xmm" xed="VFNMSUB132SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB213SD" form="xmm {z}, xmm, xmm" xed="VFNMSUB213SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<instruction name="VFNMSUB231SD" form="xmm {z}, xmm, xmm" xed="VFNMSUB231SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm, xmm, xmm {er}" xed="VFNMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213SS" form="xmm, xmm, xmm {er}" xed="VFNMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231SS" form="xmm, xmm, xmm {er}" xed="VFNMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask3_fnmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "c" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := c[31:0]
+FI
+dst[127:32] := c[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm {k}, xmm, xmm" xed="VFNMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213SS" form="xmm {k}, xmm, xmm" xed="VFNMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231SS" form="xmm {k}, xmm, xmm" xed="VFNMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231SS" form="xmm {k}, xmm, xmm {er}" xed="VFNMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_fnmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using writemask "k" (the element is copied from "c" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := a[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm {k}, xmm, xmm" xed="VFNMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213SS" form="xmm {k}, xmm, xmm" xed="VFNMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231SS" form="xmm {k}, xmm, xmm" xed="VFNMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmsub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm {z}, xmm, xmm {er}" xed="VFNMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213SS" form="xmm {z}, xmm, xmm {er}" xed="VFNMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231SS" form="xmm {z}, xmm, xmm {er}" xed="VFNMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_fnmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm {z}, xmm, xmm" xed="VFNMSUB132SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB213SS" form="xmm {z}, xmm, xmm" xed="VFNMSUB213SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<instruction name="VFNMSUB231SS" form="xmm {z}, xmm, xmm" xed="VFNMSUB231SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="zmm, vm32y" xed="VGATHERDPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="zmm {k}, vm32y" xed="VGATHERDPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="zmm, vm32z" xed="VGATHERQPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERQPD" form="zmm {k}, vm32z" xed="VGATHERQPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="ymm, vm64z" xed="VGATHERQPS_YMMf32_MASKmskw_MEMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGATHERQPS" form="ymm {k}, vm64z" xed="VGATHERQPS_YMMf32_MASKmskw_MEMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="zmm {z}, zmm" xed="VGETEXPPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getexp_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[sae_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="zmm {z}, zmm {sae}" xed="VGETEXPPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="zmm {z}, zmm" xed="VGETEXPPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getexp_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[sae_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="zmm {z}, zmm {sae}" xed="VGETEXPPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getexp_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[sae_note]</description>
+	<operation>dst[63:0] := ConvertExpFP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSD" form="xmm, xmm, xmm {sae}" xed="VGETEXPSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getexp_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>dst[63:0] := ConvertExpFP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSD" form="xmm, xmm, xmm" xed="VGETEXPSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getexp_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[sae_note]</description>
+	<operation>IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSD" form="xmm {k}, xmm, xmm {sae}" xed="VGETEXPSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getexp_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSD" form="xmm {k}, xmm, xmm" xed="VGETEXPSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getexp_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[sae_note]</description>
+	<operation>IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSD" form="xmm {z}, xmm, xmm {sae}" xed="VGETEXPSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getexp_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Convert the exponent of the lower double-precision (64-bit) floating-point element in "b" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>IF k[0]
+	dst[63:0] := ConvertExpFP64(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSD" form="xmm {z}, xmm, xmm" xed="VGETEXPSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getexp_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[sae_note]</description>
+	<operation>dst[31:0] := ConvertExpFP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSS" form="xmm, xmm, xmm {sae}" xed="VGETEXPSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getexp_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>dst[31:0] := ConvertExpFP32(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSS" form="xmm, xmm, xmm" xed="VGETEXPSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getexp_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[sae_note]</description>
+	<operation>IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSS" form="xmm {k}, xmm, xmm {sae}" xed="VGETEXPSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getexp_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSS" form="xmm {k}, xmm, xmm" xed="VGETEXPSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getexp_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.
+	[sae_note]</description>
+	<operation>IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSS" form="xmm {z}, xmm, xmm {sae}" xed="VGETEXPSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getexp_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert the exponent of the lower single-precision (32-bit) floating-point element in "b" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "floor(log2(x))" for the lower element.</description>
+	<operation>IF k[0]
+	dst[31:0] := ConvertExpFP32(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETEXPSS" form="xmm {z}, xmm, xmm" xed="VGETEXPSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="zmm {z}, zmm, imm8" xed="VGETMANTPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getmant_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="zmm {z}, zmm, imm8 {sae}" xed="VGETMANTPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="zmm {z}, zmm, imm8" xed="VGETMANTPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_getmant_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="zmm {z}, zmm, imm8 {sae}" xed="VGETMANTPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getmant_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv)
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSD" form="xmm, xmm, xmm, imm8 {sae}" xed="VGETMANTSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getmant_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv)
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSD" form="xmm, xmm, xmm, imm8" xed="VGETMANTSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getmant_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>IF k[0]
+	dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv)
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSD" form="xmm {k}, xmm, xmm, imm8 {sae}" xed="VGETMANTSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getmant_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>IF k[0]
+	dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv)
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSD" form="xmm {k}, xmm, xmm, imm8" xed="VGETMANTSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getmant_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>IF k[0]
+	dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv)
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSD" form="xmm {z}, xmm, xmm, imm8 {sae}" xed="VGETMANTSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getmant_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>IF k[0]
+	dst[63:0] := GetNormalizedMantissa(b[63:0], sc, interv)
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSD" form="xmm {z}, xmm, xmm, imm8" xed="VGETMANTSD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getmant_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv)
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSS" form="xmm, xmm, xmm, imm8 {sae}" xed="VGETMANTSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_getmant_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv)
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSS" form="xmm, xmm, xmm, imm8" xed="VGETMANTSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getmant_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>IF k[0]
+	dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv)
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSS" form="xmm {k}, xmm, xmm, imm8 {sae}" xed="VGETMANTSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_getmant_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>IF k[0]
+	dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv)
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSS" form="xmm {k}, xmm, xmm, imm8" xed="VGETMANTSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getmant_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>IF k[0]
+	dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv)
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSS" form="xmm {z}, xmm, xmm, imm8 {sae}" xed="VGETMANTSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_getmant_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>IF k[0]
+	dst[31:0] := GetNormalizedMantissa(b[31:0], sc, interv)
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGETMANTSS" form="xmm {z}, xmm, xmm, imm8" xed="VGETMANTSS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF32X4" form="zmm, zmm, xmm, imm8" xed="VINSERTF32X4_ZMMf32_MASKmskw_ZMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF32X4" form="zmm {k}, zmm, xmm, imm8" xed="VINSERTF32X4_ZMMf32_MASKmskw_ZMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_insertf32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF32X4" form="zmm {z}, zmm, xmm, imm8" xed="VINSERTF32X4_ZMMf32_MASKmskw_ZMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_insertf64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF64X4" form="zmm, zmm, ymm, imm8" xed="VINSERTF64X4_ZMMf64_MASKmskw_ZMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_insertf64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF64X4" form="zmm {k}, zmm, ymm, imm8" xed="VINSERTF64X4_ZMMf64_MASKmskw_ZMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_insertf64x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTF64X4" form="zmm {z}, zmm, ymm, imm8" xed="VINSERTF64X4_ZMMf64_MASKmskw_ZMMf64_YMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_inserti32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: dst[127:0] := b[127:0]
+1: dst[255:128] := b[127:0]
+2: dst[383:256] := b[127:0]
+3: dst[511:384] := b[127:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI32X4" form="zmm, zmm, xmm, imm8" xed="VINSERTI32X4_ZMMu32_MASKmskw_ZMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_inserti32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI32X4" form="zmm {k}, zmm, xmm, imm8" xed="VINSERTI32X4_ZMMu32_MASKmskw_ZMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_inserti32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "tmp", then insert 128 bits (composed of 4 packed 32-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[1:0]) OF
+0: tmp[127:0] := b[127:0]
+1: tmp[255:128] := b[127:0]
+2: tmp[383:256] := b[127:0]
+3: tmp[511:384] := b[127:0]
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI32X4" form="zmm {z}, zmm, xmm, imm8" xed="VINSERTI32X4_ZMMu32_MASKmskw_ZMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_inserti64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: dst[255:0] := b[255:0]
+1: dst[511:256] := b[255:0]
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI64X4" form="zmm, zmm, ymm, imm8" xed="VINSERTI64X4_ZMMu64_MASKmskw_ZMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_inserti64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI64X4" form="zmm {k}, zmm, ymm, imm8" xed="VINSERTI64X4_ZMMu64_MASKmskw_ZMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_inserti64x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "tmp", then insert 256 bits (composed of 4 packed 64-bit integers) from "b" into "tmp" at the location specified by "imm8".  Store "tmp" to "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[511:0] := a[511:0]
+CASE (imm8[0]) OF
+0: tmp[255:0] := b[255:0]
+1: tmp[511:256] := b[255:0]
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VINSERTI64X4" form="zmm {z}, zmm, ymm, imm8" xed="VINSERTI64X4_ZMMu64_MASKmskw_ZMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPD" form="zmm {k}, zmm, zmm" xed="VMAXPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).   [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPD" form="zmm {k}, zmm, zmm {sae}" xed="VMAXPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPD" form="zmm {z}, zmm, zmm" xed="VMAXPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPD" form="zmm {z}, zmm, zmm {sae}" xed="VMAXPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPD" form="zmm, zmm, zmm" xed="VMAXPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPD" form="zmm, zmm, zmm {sae}" xed="VMAXPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPS" form="zmm {k}, zmm, zmm" xed="VMAXPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).   [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPS" form="zmm {k}, zmm, zmm {sae}" xed="VMAXPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPS" form="zmm {z}, zmm, zmm" xed="VMAXPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPS" form="zmm {z}, zmm, zmm {sae}" xed="VMAXPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPS" form="zmm, zmm, zmm" xed="VMAXPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMAXPS" form="zmm, zmm, zmm {sae}" xed="VMAXPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSD" form="xmm {k}, xmm, xmm {sae}" xed="VMAXSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSD" form="xmm {k}, xmm, xmm" xed="VMAXSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSD" form="xmm {z}, xmm, xmm {sae}" xed="VMAXSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MAX(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSD" form="xmm {z}, xmm, xmm" xed="VMAXSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_max_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [sae_note]</description>
+	<operation>
+dst[63:0] := MAX(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSD" form="xmm, xmm, xmm {sae}" xed="VMAXSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSS" form="xmm {k}, xmm, xmm {sae}" xed="VMAXSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_max_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSS" form="xmm {k}, xmm, xmm" xed="VMAXSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSS" form="xmm {z}, xmm, xmm {sae}" xed="VMAXSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_max_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MAX(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSS" form="xmm {z}, xmm, xmm" xed="VMAXSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_max_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note]</description>
+	<operation>
+dst[31:0] := MAX(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMAXSS" form="xmm, xmm, xmm {sae}" xed="VMAXSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPD" form="zmm {k}, zmm, zmm" xed="VMINPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).   [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPD" form="zmm {k}, zmm, zmm {sae}" xed="VMINPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPD" form="zmm {z}, zmm, zmm" xed="VMINPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPD" form="zmm {z}, zmm, zmm {sae}" xed="VMINPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPD" form="zmm, zmm, zmm" xed="VMINPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPD" form="zmm, zmm, zmm {sae}" xed="VMINPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPS" form="zmm {k}, zmm, zmm" xed="VMINPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).   [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPS" form="zmm {k}, zmm, zmm {sae}" xed="VMINPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPS" form="zmm {z}, zmm, zmm" xed="VMINPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPS" form="zmm {z}, zmm, zmm {sae}" xed="VMINPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPS" form="zmm, zmm, zmm" xed="VMINPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".  [sae_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMINPS" form="zmm, zmm, zmm {sae}" xed="VMINPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSD" form="xmm {k}, xmm, xmm {sae}" xed="VMINSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSD" form="xmm {k}, xmm, xmm" xed="VMINSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSD" form="xmm {z}, xmm, xmm {sae}" xed="VMINSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MIN(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSD" form="xmm {z}, xmm, xmm" xed="VMINSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_min_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" , and copy the upper element from "a" to the upper element of "dst". [sae_note]</description>
+	<operation>
+dst[63:0] := MIN(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSD" form="xmm, xmm, xmm {sae}" xed="VMINSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSS" form="xmm {k}, xmm, xmm {sae}" xed="VMINSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_min_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSS" form="xmm {k}, xmm, xmm" xed="VMINSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSS" form="xmm {z}, xmm, xmm {sae}" xed="VMINSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_min_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MIN(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSS" form="xmm {z}, xmm, xmm" xed="VMINSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_min_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [sae_note]</description>
+	<operation>
+dst[31:0] := MIN(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMINSS" form="xmm, xmm, xmm {sae}" xed="VMINSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="zmm {z}, m512" xed="VMOVAPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="zmm {z}, zmm" xed="VMOVAPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="zmm {z}, m512" xed="VMOVAPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="zmm {z}, zmm" xed="VMOVAPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+tmp[319:256] := a[319:256] 
+tmp[383:320] := a[319:256] 
+tmp[447:384] := a[447:384]
+tmp[511:448] := a[447:384]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="zmm {k}, zmm" xed="VMOVDDUP_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[63:0] := a[63:0]
+tmp[127:64] := a[63:0]
+tmp[191:128] := a[191:128]
+tmp[255:192] := a[191:128]
+tmp[319:256] := a[319:256] 
+tmp[383:320] := a[319:256] 
+tmp[447:384] := a[447:384]
+tmp[511:448] := a[447:384]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="zmm {z}, zmm" xed="VMOVDDUP_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Duplicate even-indexed double-precision (64-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := a[63:0]
+dst[191:128] := a[191:128]
+dst[255:192] := a[191:128]
+dst[319:256] := a[319:256]
+dst[383:320] := a[319:256]
+dst[447:384] := a[447:384]
+dst[511:448] := a[447:384]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDDUP" form="zmm, zmm" xed="VMOVDDUP_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm {z}, m512" xed="VMOVDQA32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Move packed 32-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm {z}, zmm" xed="VMOVDQA32_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="zmm {z}, m512" xed="VMOVDQA64_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Move packed 64-bit integers from "a" into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="zmm {z}, zmm" xed="VMOVDQA64_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_loadu_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load 512-bits of integer data from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="zmm, m512" xed="VMOVDQU32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="zmm {k}, m512" xed="VMOVDQU32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU32" form="m512 {k}, zmm" xed="VMOVDQU32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_loadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load packed 32-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU32" form="zmm {z}, m512" xed="VMOVDQU32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_storeu_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="M512" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<description>Store 512-bits of integer data from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQU32" form="m512, zmm" xed="VMOVDQU32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="zmm {k}, m512" xed="VMOVDQU64_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_storeu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQU64" form="m512 {k}, zmm" xed="VMOVDQU64_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_loadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load packed 64-bit integers from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQU64" form="zmm {z}, m512" xed="VMOVDQU64_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_stream_load_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="void const*" varname="mem_addr" etype="M512" memwidth="512"/>
+	<description>Load 512-bits of integer data from memory into "dst" using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVNTDQA" form="zmm, m512" xed="VMOVNTDQA_ZMMu32_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_stream_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="M512" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<description>Store 512-bits of integer data from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVNTDQ" form="m512, zmm" xed="VMOVNTDQ_MEMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_stream_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVNTPD" form="m512, zmm" xed="VMOVNTPD_MEMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_stream_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVNTPS" form="m512, zmm" xed="VMOVNTPS_MEMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_load_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MEM[mem_addr+63:mem_addr]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VMOVSD" form="xmm {k}, m64" xed="VMOVSD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_move_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSD" form="xmm {k}, xmm, xmm" xed="VMOVSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_store_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	MEM[mem_addr+63:mem_addr] := a[63:0]
+FI
+	</operation>
+	<instruction name="VMOVSD" form="m64 {k}, xmm" xed="VMOVSD_MEMf64_MASKmskw_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_load_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper element of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := MEM[mem_addr+63:mem_addr]
+ELSE
+	dst[63:0] := 0
+FI
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VMOVSD" form="xmm {z}, m64" xed="VMOVSD_XMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_move_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSD" form="xmm {z}, xmm, xmm" xed="VMOVSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+tmp[287:256] := a[319:288] 
+tmp[319:288] := a[319:288] 
+tmp[351:320] := a[383:352] 
+tmp[383:352] := a[383:352] 
+tmp[415:384] := a[447:416] 
+tmp[447:416] := a[447:416] 
+tmp[479:448] := a[511:480]
+tmp[511:480] := a[511:480]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="zmm {k}, zmm" xed="VMOVSHDUP_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[63:32] 
+tmp[63:32] := a[63:32] 
+tmp[95:64] := a[127:96] 
+tmp[127:96] := a[127:96]
+tmp[159:128] := a[191:160] 
+tmp[191:160] := a[191:160] 
+tmp[223:192] := a[255:224] 
+tmp[255:224] := a[255:224]
+tmp[287:256] := a[319:288] 
+tmp[319:288] := a[319:288] 
+tmp[351:320] := a[383:352] 
+tmp[383:352] := a[383:352] 
+tmp[415:384] := a[447:416] 
+tmp[447:416] := a[447:416] 
+tmp[479:448] := a[511:480]
+tmp[511:480] := a[511:480]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="zmm {z}, zmm" xed="VMOVSHDUP_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] 
+dst[63:32] := a[63:32] 
+dst[95:64] := a[127:96] 
+dst[127:96] := a[127:96]
+dst[159:128] := a[191:160] 
+dst[191:160] := a[191:160] 
+dst[223:192] := a[255:224] 
+dst[255:224] := a[255:224]
+dst[287:256] := a[319:288] 
+dst[319:288] := a[319:288] 
+dst[351:320] := a[383:352] 
+dst[383:352] := a[383:352] 
+dst[415:384] := a[447:416] 
+dst[447:416] := a[447:416] 
+dst[479:448] := a[511:480]
+dst[511:480] := a[511:480]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVSHDUP" form="zmm, zmm" xed="VMOVSHDUP_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+tmp[287:256] := a[287:256] 
+tmp[319:288] := a[287:256] 
+tmp[351:320] := a[351:320] 
+tmp[383:352] := a[351:320] 
+tmp[415:384] := a[415:384] 
+tmp[447:416] := a[415:384] 
+tmp[479:448] := a[479:448]
+tmp[511:480] := a[479:448]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="zmm {k}, zmm" xed="VMOVSLDUP_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp[31:0] := a[31:0] 
+tmp[63:32] := a[31:0] 
+tmp[95:64] := a[95:64] 
+tmp[127:96] := a[95:64]
+tmp[159:128] := a[159:128] 
+tmp[191:160] := a[159:128] 
+tmp[223:192] := a[223:192] 
+tmp[255:224] := a[223:192]
+tmp[287:256] := a[287:256] 
+tmp[319:288] := a[287:256] 
+tmp[351:320] := a[351:320] 
+tmp[383:352] := a[351:320] 
+tmp[415:384] := a[415:384] 
+tmp[447:416] := a[415:384] 
+tmp[479:448] := a[479:448]
+tmp[511:480] := a[479:448]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="zmm {z}, zmm" xed="VMOVSLDUP_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] 
+dst[63:32] := a[31:0] 
+dst[95:64] := a[95:64] 
+dst[127:96] := a[95:64]
+dst[159:128] := a[159:128] 
+dst[191:160] := a[159:128] 
+dst[223:192] := a[223:192] 
+dst[255:224] := a[223:192]
+dst[287:256] := a[287:256] 
+dst[319:288] := a[287:256] 
+dst[351:320] := a[351:320] 
+dst[383:352] := a[351:320] 
+dst[415:384] := a[415:384] 
+dst[447:416] := a[415:384] 
+dst[479:448] := a[479:448]
+dst[511:480] := a[479:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVSLDUP" form="zmm, zmm" xed="VMOVSLDUP_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_load_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const float*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<description>Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MEM[mem_addr+31:mem_addr]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VMOVSS" form="xmm {k}, m32" xed="VMOVSS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_move_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSS" form="xmm {k}, xmm, xmm" xed="VMOVSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_store_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	MEM[mem_addr+31:mem_addr] := a[31:0]
+FI
+	</operation>
+	<instruction name="VMOVSS" form="m32 {k}, xmm" xed="VMOVSS_MEMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_load_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const float*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<description>Load a single-precision (32-bit) floating-point element from memory into the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of "dst" to zero. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := MEM[mem_addr+31:mem_addr]
+ELSE
+	dst[31:0] := 0
+FI
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VMOVSS" form="xmm {z}, m32" xed="VMOVSS_XMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_move_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMOVSS" form="xmm {z}, xmm, xmm" xed="VMOVSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="zmm, m512" xed="VMOVUPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="zmm {k}, m512" xed="VMOVUPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVUPD" form="m512 {k}, zmm" xed="VMOVUPD_MEMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memoy into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVUPD" form="zmm {z}, m512" xed="VMOVUPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory. 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVUPD" form="m512, zmm" xed="VMOVUPD_MEMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="zmm, m512" xed="VMOVUPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="zmm {k}, m512" xed="VMOVUPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVUPS" form="m512 {k}, zmm" xed="VMOVUPS_MEMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVUPS" form="zmm {z}, m512" xed="VMOVUPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. 
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVUPS" form="m512, zmm" xed="VMOVUPS_MEMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPD" form="zmm {z}, zmm, zmm" xed="VMULPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mul_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPD" form="zmm {z}, zmm, zmm {er}" xed="VMULPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPS" form="zmm {z}, zmm, zmm" xed="VMULPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mul_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPS" form="zmm {z}, zmm, zmm {er}" xed="VMULPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSD" form="xmm {k}, xmm, xmm {er}" xed="VMULSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSD" form="xmm {k}, xmm, xmm" xed="VMULSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSD" form="xmm {z}, xmm, xmm {er}" xed="VMULSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] * b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSD" form="xmm {z}, xmm, xmm" xed="VMULSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mul_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+		[round_note]</description>
+	<operation>
+dst[63:0] := a[63:0] * b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSD" form="xmm, xmm, xmm {er}" xed="VMULSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSS" form="xmm {k}, xmm, xmm {er}" xed="VMULSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_mul_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSS" form="xmm {k}, xmm, xmm" xed="VMULSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSS" form="xmm {z}, xmm, xmm {er}" xed="VMULSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_mul_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] * b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSS" form="xmm {z}, xmm, xmm" xed="VMULSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mul_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+		[round_note]</description>
+	<operation>
+dst[31:0] := a[31:0] * b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VMULSS" form="xmm, xmm, xmm {er}" xed="VMULSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSD" form="zmm, zmm" xed="VPABSD_ZMMi32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSD" form="zmm {k}, zmm" xed="VPABSD_ZMMi32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_abs_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSD" form="zmm {z}, zmm" xed="VPABSD_ZMMi32_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ABS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSQ" form="zmm, zmm" xed="VPABSQ_ZMMi64_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSQ" form="zmm {k}, zmm" xed="VPABSQ_ZMMi64_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_abs_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Compute the absolute value of packed signed 64-bit integers in "a", and store the unsigned results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPABSQ" form="zmm {z}, zmm" xed="VPABSQ_ZMMi64_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDD" form="zmm {z}, zmm, zmm" xed="VPADDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDQ" form="zmm, zmm, zmm" xed="VPADDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDQ" form="zmm {k}, zmm, zmm" xed="VPADDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDQ" form="zmm {z}, zmm, zmm" xed="VPADDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDD" form="zmm {z}, zmm, zmm" xed="VPANDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDND" form="zmm {z}, zmm, zmm" xed="VPANDND_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (NOT a[i+63:i]) AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDNQ" form="zmm {z}, zmm, zmm" xed="VPANDNQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDQ" form="zmm {z}, zmm, zmm" xed="VPANDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set1_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTB" form="zmm, r8" xed="VPBROADCASTB_ZMMu8_MASKmskw_GPR32u8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="zmm, xmm" xed="VPBROADCASTD_ZMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="zmm {k}, xmm" xed="VPBROADCASTD_ZMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="zmm {k}, r32" xed="VPBROADCASTD_ZMMu32_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcastd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Broadcast the low packed 32-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="zmm {z}, xmm" xed="VPBROADCASTD_ZMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[31:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="zmm {z}, r32" xed="VPBROADCASTD_ZMMu32_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_set1_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTD" form="zmm, r32" xed="VPBROADCASTD_ZMMu32_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="zmm, xmm" xed="VPBROADCASTQ_ZMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="zmm {k}, xmm" xed="VPBROADCASTQ_ZMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="zmm {k}, r64" xed="VPBROADCASTQ_ZMMu64_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_broadcastq_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Broadcast the low packed 64-bit integer from "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="zmm {z}, xmm" xed="VPBROADCASTQ_ZMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[63:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="zmm {z}, r64" xed="VPBROADCASTQ_ZMMu64_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_set1_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTQ" form="zmm, r64" xed="VPBROADCASTQ_ZMMu64_MASKmskw_GPR64u64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set1_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast the low packed 16-bit integer from "a" to all all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBROADCASTW" form="zmm, r16" xed="VPBROADCASTW_ZMMu16_MASKmskw_GPR32u16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPEQQ" form="k, zmm, zmm" xed="VPCMPEQQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPGTQ" form="k, zmm, zmm" xed="VPCMPGTQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmp_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpeq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPEQQ" form="k {k}, zmm, zmm" xed="VPCMPEQQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpge_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpgt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPGTQ" form="k {k}, zmm, zmm" xed="VPCMPGTQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmple_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmplt_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpneq_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPQ_MASKmskw_MASKmskw_ZMMi64_ZMMi64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmp_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpeq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] == b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpge_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpgt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &gt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmple_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt;= b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmplt_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] &lt; b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cmpneq_epu64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] != b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPCMPUQ" form="k {k}, zmm, zmm, imm8" xed="VPCMPUQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSD" form="zmm {k}, zmm" xed="VPCOMPRESSD_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compressstoreu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 32
+m := base_addr
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSD" form="m32 {k}, zmm" xed="VPCOMPRESSD_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_compress_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Contiguously store the active 32-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 32
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[m+size-1:m] := a[i+31:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSD" form="zmm {z}, zmm" xed="VPCOMPRESSD_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="zmm {k}, zmm" xed="VPCOMPRESSQ_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compressstoreu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 64
+m := base_addr
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="m64 {k}, zmm" xed="VPCOMPRESSQ_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_compress_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Contiguously store the active 64-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 64
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[m+size-1:m] := a[i+63:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSQ" form="zmm {z}, zmm" xed="VPCOMPRESSQ_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMD" form="zmm {k}, zmm, zmm" xed="VPERMD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMD" form="zmm {z}, zmm, zmm" xed="VPERMD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutexvar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMD" form="zmm, zmm, zmm" xed="VPERMD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask2_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="zmm {k}, zmm, zmm" xed="VPERMI2D_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMT2D" form="zmm {k}, zmm, zmm" xed="VPERMT2D_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="zmm {z}, zmm, zmm" xed="VPERMI2D_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<instruction name="VPERMT2D" form="zmm {z}, zmm, zmm" xed="VPERMT2D_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex2var_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2D" form="zmm, zmm, zmm" xed="VPERMI2D_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<instruction name="VPERMT2D" form="zmm, zmm, zmm" xed="VPERMT2D_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask2_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set)</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="zmm {k}, zmm, zmm" xed="VPERMI2PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMT2PD" form="zmm {k}, zmm, zmm" xed="VPERMT2PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="zmm {z}, zmm, zmm" xed="VPERMI2PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VPERMT2PD" form="zmm {z}, zmm, zmm" xed="VPERMT2PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex2var_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2PD" form="zmm, zmm, zmm" xed="VPERMI2PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VPERMT2PD" form="zmm, zmm, zmm" xed="VPERMT2PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask2_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := idx[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="zmm {k}, zmm, zmm" xed="VPERMI2PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMT2PS" form="zmm {k}, zmm, zmm" xed="VPERMT2PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := (idx[i+4]) ? b[off+31:off] : a[off+31:off]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="zmm {z}, zmm, zmm" xed="VPERMI2PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VPERMT2PS" form="zmm {z}, zmm, zmm" xed="VPERMT2PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex2var_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	off := idx[i+3:i]*32
+	dst[i+31:i] := idx[i+4] ? b[off+31:off] : a[off+31:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2PS" form="zmm, zmm, zmm" xed="VPERMI2PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VPERMT2PS" form="zmm, zmm, zmm" xed="VPERMT2PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask2_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "idx" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := idx[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="zmm {k}, zmm, zmm" xed="VPERMI2Q_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMT2Q" form="zmm {k}, zmm, zmm" xed="VPERMT2Q_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := (idx[i+3]) ? b[off+63:off] : a[off+63:off]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="zmm {z}, zmm, zmm" xed="VPERMI2Q_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<instruction name="VPERMT2Q" form="zmm {z}, zmm, zmm" xed="VPERMT2Q_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex2var_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	off := idx[i+2:i]*64
+	dst[i+63:i] := idx[i+3] ? b[off+63:off] : a[off+63:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2Q" form="zmm, zmm, zmm" xed="VPERMI2Q_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<instruction name="VPERMT2Q" form="zmm, zmm, zmm" xed="VPERMT2Q_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI
+IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI
+IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI
+IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI
+IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI
+IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI
+IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI
+IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI
+IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="zmm {k}, zmm, imm8" xed="VPERMILPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI
+IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI
+IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI
+IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI
+IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI
+IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI
+IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI
+IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI
+IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="zmm {k}, zmm, zmm" xed="VPERMILPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (imm8[0] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (imm8[2] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (imm8[2] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (imm8[3] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (imm8[3] == 1) tmp_dst[255:192] := a[255:192]; FI
+IF (imm8[4] == 0) tmp_dst[319:256] := a[319:256]; FI
+IF (imm8[4] == 1) tmp_dst[319:256] := a[383:320]; FI
+IF (imm8[5] == 0) tmp_dst[383:320] := a[319:256]; FI
+IF (imm8[5] == 1) tmp_dst[383:320] := a[383:320]; FI
+IF (imm8[6] == 0) tmp_dst[447:384] := a[447:384]; FI
+IF (imm8[6] == 1) tmp_dst[447:384] := a[511:448]; FI
+IF (imm8[7] == 0) tmp_dst[511:448] := a[447:384]; FI
+IF (imm8[7] == 1) tmp_dst[511:448] := a[511:448]; FI
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="zmm {z}, zmm, imm8" xed="VPERMILPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+IF (b[1] == 0) tmp_dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) tmp_dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) tmp_dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) tmp_dst[127:64] := a[127:64]; FI
+IF (b[129] == 0) tmp_dst[191:128] := a[191:128]; FI
+IF (b[129] == 1) tmp_dst[191:128] := a[255:192]; FI
+IF (b[193] == 0) tmp_dst[255:192] := a[191:128]; FI
+IF (b[193] == 1) tmp_dst[255:192] := a[255:192]; FI
+IF (b[257] == 0) tmp_dst[319:256] := a[319:256]; FI
+IF (b[257] == 1) tmp_dst[319:256] := a[383:320]; FI
+IF (b[321] == 0) tmp_dst[383:320] := a[319:256]; FI
+IF (b[321] == 1) tmp_dst[383:320] := a[383:320]; FI
+IF (b[385] == 0) tmp_dst[447:384] := a[447:384]; FI
+IF (b[385] == 1) tmp_dst[447:384] := a[511:448]; FI
+IF (b[449] == 0) tmp_dst[511:448] := a[447:384]; FI
+IF (b[449] == 1) tmp_dst[511:448] := a[511:448]; FI
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="zmm {z}, zmm, zmm" xed="VPERMILPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permute_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI
+IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI
+IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI
+IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI
+IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI
+IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI
+IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI
+IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI
+IF (imm8[4] == 0) dst[319:256] := a[319:256]; FI
+IF (imm8[4] == 1) dst[319:256] := a[383:320]; FI
+IF (imm8[5] == 0) dst[383:320] := a[319:256]; FI
+IF (imm8[5] == 1) dst[383:320] := a[383:320]; FI
+IF (imm8[6] == 0) dst[447:384] := a[447:384]; FI
+IF (imm8[6] == 1) dst[447:384] := a[511:448]; FI
+IF (imm8[7] == 0) dst[511:448] := a[447:384]; FI
+IF (imm8[7] == 1) dst[511:448] := a[511:448]; FI
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="zmm, zmm, imm8" xed="VPERMILPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutevar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+IF (b[1] == 0) dst[63:0] := a[63:0]; FI
+IF (b[1] == 1) dst[63:0] := a[127:64]; FI
+IF (b[65] == 0) dst[127:64] := a[63:0]; FI
+IF (b[65] == 1) dst[127:64] := a[127:64]; FI
+IF (b[129] == 0) dst[191:128] := a[191:128]; FI
+IF (b[129] == 1) dst[191:128] := a[255:192]; FI
+IF (b[193] == 0) dst[255:192] := a[191:128]; FI
+IF (b[193] == 1) dst[255:192] := a[255:192]; FI
+IF (b[257] == 0) dst[319:256] := a[319:256]; FI
+IF (b[257] == 1) dst[319:256] := a[383:320]; FI
+IF (b[321] == 0) dst[383:320] := a[319:256]; FI
+IF (b[321] == 1) dst[383:320] := a[383:320]; FI
+IF (b[385] == 0) dst[447:384] := a[447:384]; FI
+IF (b[385] == 1) dst[447:384] := a[511:448]; FI
+IF (b[449] == 0) dst[511:448] := a[447:384]; FI
+IF (b[449] == 1) dst[511:448] := a[511:448]; FI
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPD" form="zmm, zmm, zmm" xed="VPERMILPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="zmm {k}, zmm, imm8" xed="VPERMILPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+tmp_dst[287:256] := SELECT4(a[383:256], b[257:256])
+tmp_dst[319:288] := SELECT4(a[383:256], b[289:288])
+tmp_dst[351:320] := SELECT4(a[383:256], b[321:320])
+tmp_dst[383:352] := SELECT4(a[383:256], b[353:352])
+tmp_dst[415:384] := SELECT4(a[511:384], b[385:384])
+tmp_dst[447:416] := SELECT4(a[511:384], b[417:416])
+tmp_dst[479:448] := SELECT4(a[511:384], b[449:448])
+tmp_dst[511:480] := SELECT4(a[511:384], b[481:480])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="zmm {k}, zmm, zmm" xed="VPERMILPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="zmm {z}, zmm, imm8" xed="VPERMILPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], b[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], b[33:32])
+tmp_dst[95:64] := SELECT4(a[127:0], b[65:64])
+tmp_dst[127:96] := SELECT4(a[127:0], b[97:96])
+tmp_dst[159:128] := SELECT4(a[255:128], b[129:128])
+tmp_dst[191:160] := SELECT4(a[255:128], b[161:160])
+tmp_dst[223:192] := SELECT4(a[255:128], b[193:192])
+tmp_dst[255:224] := SELECT4(a[255:128], b[225:224])
+tmp_dst[287:256] := SELECT4(a[383:256], b[257:256])
+tmp_dst[319:288] := SELECT4(a[383:256], b[289:288])
+tmp_dst[351:320] := SELECT4(a[383:256], b[321:320])
+tmp_dst[383:352] := SELECT4(a[383:256], b[353:352])
+tmp_dst[415:384] := SELECT4(a[511:384], b[385:384])
+tmp_dst[447:416] := SELECT4(a[511:384], b[417:416])
+tmp_dst[479:448] := SELECT4(a[511:384], b[449:448])
+tmp_dst[511:480] := SELECT4(a[511:384], b[481:480])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="zmm {z}, zmm, zmm" xed="VPERMILPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permute_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="zmm, zmm, imm8" xed="VPERMILPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutevar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], b[1:0])
+dst[63:32] := SELECT4(a[127:0], b[33:32])
+dst[95:64] := SELECT4(a[127:0], b[65:64])
+dst[127:96] := SELECT4(a[127:0], b[97:96])
+dst[159:128] := SELECT4(a[255:128], b[129:128])
+dst[191:160] := SELECT4(a[255:128], b[161:160])
+dst[223:192] := SELECT4(a[255:128], b[193:192])
+dst[255:224] := SELECT4(a[255:128], b[225:224])
+dst[287:256] := SELECT4(a[383:256], b[257:256])
+dst[319:288] := SELECT4(a[383:256], b[289:288])
+dst[351:320] := SELECT4(a[383:256], b[321:320])
+dst[383:352] := SELECT4(a[383:256], b[353:352])
+dst[415:384] := SELECT4(a[511:384], b[385:384])
+dst[447:416] := SELECT4(a[511:384], b[417:416])
+dst[479:448] := SELECT4(a[511:384], b[449:448])
+dst[511:480] := SELECT4(a[511:384], b[481:480])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMILPS" form="zmm, zmm, zmm" xed="VPERMILPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPD" form="zmm {k}, zmm, imm8" xed="VPERMPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPD" form="zmm {k}, zmm, zmm" xed="VPERMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPD" form="zmm {z}, zmm, imm8" xed="VPERMPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPD" form="zmm {z}, zmm, zmm" xed="VPERMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPD" form="zmm, zmm, imm8" xed="VPERMPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutexvar_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPD" form="zmm, zmm, zmm" xed="VPERMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPS" form="zmm {k}, zmm, zmm" xed="VPERMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPS" form="zmm {z}, zmm, zmm" xed="VPERMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutexvar_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" across lanes using the corresponding index in "idx".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMPS" form="zmm, zmm, zmm" xed="VPERMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMQ" form="zmm {k}, zmm, imm8" xed="VPERMQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMQ" form="zmm {k}, zmm, zmm" xed="VPERMQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMQ" form="zmm {z}, zmm, imm8" xed="VPERMQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	IF k[j]
+		dst[i+63:i] := a[id+63:id]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMQ" form="zmm {z}, zmm, zmm" xed="VPERMQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 64-bit integers in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[63:0] := src[63:0]
+	1:	tmp[63:0] := src[127:64]
+	2:	tmp[63:0] := src[191:128]
+	3:	tmp[63:0] := src[255:192]
+	ESAC
+	RETURN tmp[63:0]
+}
+dst[63:0] := SELECT4(a[255:0], imm8[1:0])
+dst[127:64] := SELECT4(a[255:0], imm8[3:2])
+dst[191:128] := SELECT4(a[255:0], imm8[5:4])
+dst[255:192] := SELECT4(a[255:0], imm8[7:6])
+dst[319:256] := SELECT4(a[511:256], imm8[1:0])
+dst[383:320] := SELECT4(a[511:256], imm8[3:2])
+dst[447:384] := SELECT4(a[511:256], imm8[5:4])
+dst[511:448] := SELECT4(a[511:256], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMQ" form="zmm, zmm, imm8" xed="VPERMQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutexvar_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="idx" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Shuffle 64-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	id := idx[i+2:i]*64
+	dst[i+63:i] := a[id+63:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMQ" form="zmm, zmm, zmm" xed="VPERMQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="zmm {k}, zmm" xed="VPEXPANDD_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="zmm {k}, m32" xed="VPEXPANDD_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expand_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Load contiguous active 32-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[m+31:m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="zmm {z}, zmm" xed="VPEXPANDD_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expandloadu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load contiguous active 32-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+m+31:mem_addr+m]
+		m := m + 32
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDD" form="zmm {z}, m32" xed="VPEXPANDD_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="zmm {k}, zmm" xed="VPEXPANDQ_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="zmm {k}, m64" xed="VPEXPANDQ_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expand_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Load contiguous active 64-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[m+63:m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="zmm {z}, zmm" xed="VPEXPANDQ_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expandloadu_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load contiguous active 64-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+m+63:mem_addr+m]
+		m := m + 64
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDQ" form="zmm {z}, m64" xed="VPEXPANDQ_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="zmm, vm32y" xed="VPGATHERDQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i32gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="zmm {k}, vm32y" xed="VPGATHERDQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="ymm, vm64z" xed="VPGATHERQD_YMMu32_MASKmskw_MEMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPGATHERQD" form="ymm {k}, vm64z" xed="VPGATHERQD_YMMu32_MASKmskw_MEMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="zmm, vm64z" xed="VPGATHERQQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64gather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERQQ" form="zmm {k}, vm64z" xed="VPGATHERQQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0 
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="zmm {z}, zmm, zmm" xed="VPMAXSD_ZMMi32_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="zmm {k}, zmm, zmm" xed="VPMAXSQ_ZMMi64_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="zmm {z}, zmm, zmm" xed="VPMAXSQ_ZMMi64_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSQ" form="zmm, zmm, zmm" xed="VPMAXSQ_ZMMi64_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="zmm {z}, zmm, zmm" xed="VPMAXUD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="zmm {k}, zmm, zmm" xed="VPMAXUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="zmm {z}, zmm, zmm" xed="VPMAXUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUQ" form="zmm, zmm, zmm" xed="VPMAXUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSD" form="zmm {z}, zmm, zmm" xed="VPMINSD_ZMMi32_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="zmm {k}, zmm, zmm" xed="VPMINSQ_ZMMi64_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="zmm {z}, zmm, zmm" xed="VPMINSQ_ZMMi64_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSQ" form="zmm, zmm, zmm" xed="VPMINSQ_ZMMi64_MASKmskw_ZMMi64_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUD" form="zmm {z}, zmm, zmm" xed="VPMINUD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="zmm {k}, zmm, zmm" xed="VPMINUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="zmm {z}, zmm, zmm" xed="VPMINUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compare packed unsigned 64-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUQ" form="zmm, zmm, zmm" xed="VPMINUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Truncate8(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm, zmm" xed="VPMOVDB_XMMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm {k}, zmm" xed="VPMOVDB_XMMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVDB" form="m128 {k}, zmm" xed="VPMOVDB_MEMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVDB" form="xmm {z}, zmm" xed="VPMOVDB_XMMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Truncate16(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="ymm, zmm" xed="VPMOVDW_YMMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="ymm {k}, zmm" xed="VPMOVDW_YMMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="256"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVDW" form="m256 {k}, zmm" xed="VPMOVDW_MEMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed 32-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVDW" form="ymm {z}, zmm" xed="VPMOVDW_YMMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Truncate8(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm, zmm" xed="VPMOVQB_XMMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm {k}, zmm" xed="VPMOVQB_XMMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQB" form="m64 {k}, zmm" xed="VPMOVQB_MEMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 8-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Truncate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQB" form="xmm {z}, zmm" xed="VPMOVQB_XMMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Truncate32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="ymm, zmm" xed="VPMOVQD_YMMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="ymm {k}, zmm" xed="VPMOVQD_YMMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQD" form="m256 {k}, zmm" xed="VPMOVQD_MEMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 32-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Truncate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVQD" form="ymm {z}, zmm" xed="VPMOVQD_YMMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Truncate16(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm, zmm" xed="VPMOVQW_XMMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm {k}, zmm" xed="VPMOVQW_XMMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVQW" form="m128 {k}, zmm" xed="VPMOVQW_MEMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed 64-bit integers in "a" to packed 16-bit integers with truncation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Truncate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVQW" form="xmm {z}, zmm" xed="VPMOVQW_XMMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := Saturate8(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm, zmm" xed="VPMOVSDB_XMMi8_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="SI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm {k}, zmm" xed="VPMOVSDB_XMMi8_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSDB" form="m128 {k}, zmm" xed="VPMOVSDB_MEMi8_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtsepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSDB" form="xmm {z}, zmm" xed="VPMOVSDB_XMMi8_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := Saturate16(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="ymm, zmm" xed="VPMOVSDW_YMMi16_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__m256i" varname="src" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="ymm {k}, zmm" xed="VPMOVSDW_YMMi16_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI16" memwidth="256"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSDW" form="m256 {k}, zmm" xed="VPMOVSDW_MEMi16_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtsepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSDW" form="ymm {z}, zmm" xed="VPMOVSDW_YMMi16_MASKmskw_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := Saturate8(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm, zmm" xed="VPMOVSQB_XMMi8_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="src" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm {k}, zmm" xed="VPMOVSQB_XMMi8_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQB" form="m64 {k}, zmm" xed="VPMOVSQB_MEMi8_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtsepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 8-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := Saturate8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVSQB" form="xmm {z}, zmm" xed="VPMOVSQB_XMMi8_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := Saturate32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="ymm, zmm" xed="VPMOVSQD_YMMi32_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="ymm {k}, zmm" xed="VPMOVSQD_YMMi32_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQD" form="m256 {k}, zmm" xed="VPMOVSQD_MEMi32_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtsepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 32-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := Saturate32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVSQD" form="ymm {z}, zmm" xed="VPMOVSQD_YMMi32_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := Saturate16(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm, zmm" xed="VPMOVSQW_XMMi16_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="src" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm {k}, zmm" xed="VPMOVSQW_XMMi16_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtsepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="SI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVSQW" form="m128 {k}, zmm" xed="VPMOVSQW_MEMi16_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtsepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Convert packed signed 64-bit integers in "a" to packed 16-bit integers with signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := Saturate16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVSQW" form="xmm {z}, zmm" xed="VPMOVSQW_XMMi16_MASKmskw_ZMMi64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := SignExtend32(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="zmm, xmm" xed="VPMOVSXBD_ZMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="zmm {k}, xmm" xed="VPMOVSXBD_ZMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBD" form="zmm {z}, xmm" xed="VPMOVSXBD_ZMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := SignExtend64(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="zmm, xmm" xed="VPMOVSXBQ_ZMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m512i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="zmm {k}, xmm" xed="VPMOVSXBQ_ZMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXBQ" form="zmm {z}, xmm" xed="VPMOVSXBQ_ZMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := SignExtend64(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="zmm, ymm" xed="VPMOVSXDQ_ZMMi64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m512i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="zmm {k}, ymm" xed="VPMOVSXDQ_ZMMi64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXDQ" form="zmm {z}, ymm" xed="VPMOVSXDQ_ZMMi64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := SignExtend32(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="zmm, ymm" xed="VPMOVSXWD_ZMMi32_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	l := j*16
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="zmm {k}, ymm" xed="VPMOVSXWD_ZMMi32_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := SignExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXWD" form="zmm {z}, ymm" xed="VPMOVSXWD_ZMMi32_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := SignExtend64(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="zmm, xmm" xed="VPMOVSXWQ_ZMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m512i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="zmm {k}, xmm" xed="VPMOVSXWQ_ZMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVSXWQ" form="zmm {z}, xmm" xed="VPMOVSXWQ_ZMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[k+7:k] := SaturateU8(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm, zmm" xed="VPMOVUSDB_XMMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm {k}, zmm" xed="VPMOVUSDB_XMMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi32_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSDB" form="m128 {k}, zmm" xed="VPMOVUSDB_MEMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtusepi32_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+31:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSDB" form="xmm {z}, zmm" xed="VPMOVUSDB_XMMu8_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[k+15:k] := SaturateU16(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="ymm, zmm" xed="VPMOVUSDW_YMMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="ymm {k}, zmm" xed="VPMOVUSDW_YMMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi32_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="256"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSDW" form="m256 {k}, zmm" xed="VPMOVUSDW_MEMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtusepi32_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Convert packed unsigned 32-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+31:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSDW" form="ymm {z}, zmm" xed="VPMOVUSDW_YMMu16_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[k+7:k] := SaturateU8(a[i+63:i])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm, zmm" xed="VPMOVUSQB_XMMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := src[l+7:l]
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm {k}, zmm" xed="VPMOVUSQB_XMMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi64_storeu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQB" form="m64 {k}, zmm" xed="VPMOVUSQB_MEMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtusepi64_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 8-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[l+7:l] := SaturateU8(a[i+63:i])
+	ELSE
+		dst[l+7:l] := 0
+	FI
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPMOVUSQB" form="xmm {z}, zmm" xed="VPMOVUSQB_XMMu8_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[k+31:k] := SaturateU32(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="ymm, zmm" xed="VPMOVUSQD_YMMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := SaturateU32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="ymm {k}, zmm" xed="VPMOVUSQD_YMMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi64_storeu_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32" memwidth="256"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQD" form="m256 {k}, zmm" xed="VPMOVUSQD_MEMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtusepi64_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 32-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[l+31:l] := SaturateU32(a[i+63:i])
+	ELSE
+		dst[l+31:l] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMOVUSQD" form="ymm {z}, zmm" xed="VPMOVUSQD_YMMu32_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[k+15:k] := SaturateU16(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm, zmm" xed="VPMOVUSQW_XMMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := src[l+15:l]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm {k}, zmm" xed="VPMOVUSQW_XMMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtusepi64_storeu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPMOVUSQW" form="m128 {k}, zmm" xed="VPMOVUSQW_MEMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtusepi64_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Convert packed unsigned 64-bit integers in "a" to packed unsigned 16-bit integers with unsigned saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[l+15:l] := SaturateU16(a[i+63:i])
+	ELSE
+		dst[l+15:l] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMOVUSQW" form="xmm {z}, zmm" xed="VPMOVUSQW_XMMu16_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := ZeroExtend32(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="zmm, xmm" xed="VPMOVZXBD_ZMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="zmm {k}, xmm" xed="VPMOVZXBD_ZMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 8*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+7:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBD" form="zmm {z}, xmm" xed="VPMOVZXBD_ZMMi32_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := ZeroExtend64(a[k+7:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="zmm, xmm" xed="VPMOVZXBQ_ZMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="zmm {k}, xmm" xed="VPMOVZXBQ_ZMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 8*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+7:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXBQ" form="zmm {z}, xmm" xed="VPMOVZXBQ_ZMMi64_MASKmskw_XMMi8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := ZeroExtend64(a[k+31:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="zmm, ymm" xed="VPMOVZXDQ_ZMMi64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+31:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="zmm {k}, ymm" xed="VPMOVZXDQ_ZMMi64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 32*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+31:l])
+	ELSE 
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXDQ" form="zmm {z}, ymm" xed="VPMOVZXDQ_ZMMi64_MASKmskw_YMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := ZeroExtend32(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="zmm, ymm" xed="VPMOVZXWD_ZMMi32_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="zmm {k}, ymm" xed="VPMOVZXWD_ZMMi32_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	l := 16*j
+	IF k[j]
+		dst[i+31:i] := ZeroExtend32(a[l+15:l])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXWD" form="zmm {z}, ymm" xed="VPMOVZXWD_ZMMi32_MASKmskw_YMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := ZeroExtend64(a[k+15:k])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="zmm, xmm" xed="VPMOVZXWQ_ZMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="zmm {k}, xmm" xed="VPMOVZXWQ_ZMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	l := 16*j
+	IF k[j]
+		dst[i+63:i] := ZeroExtend64(a[l+15:l])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMOVZXWQ" form="zmm {z}, xmm" xed="VPMOVZXWQ_ZMMi64_MASKmskw_XMMi16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m512i" varname="src" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="zmm {k}, zmm, zmm" xed="VPMULDQ_ZMMi64_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="zmm {z}, zmm, zmm" xed="VPMULDQ_ZMMi64_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULDQ" form="zmm, zmm, zmm" xed="VPMULDQ_ZMMi64_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="zmm {k}, zmm, zmm" xed="VPMULUDQ_ZMMu64_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="zmm {z}, zmm, zmm" xed="VPMULUDQ_ZMMu64_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mul_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULUDQ" form="zmm, zmm, zmm" xed="VPMULUDQ_ZMMu64_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPORD" form="zmm {z}, zmm, zmm" xed="VPORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPORQ" form="zmm {z}, zmm, zmm" xed="VPORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLD" form="zmm {k}, zmm, imm8" xed="VPROLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLD" form="zmm {z}, zmm, imm8" xed="VPROLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rol_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLD" form="zmm, zmm, imm8" xed="VPROLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLQ" form="zmm {k}, zmm, imm8" xed="VPROLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLQ" form="zmm {z}, zmm, imm8" xed="VPROLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rol_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLQ" form="zmm, zmm, imm8" xed="VPROLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLVD" form="zmm {k}, zmm, zmm" xed="VPROLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLVD" form="zmm {z}, zmm, zmm" xed="VPROLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rolv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LEFT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLVD" form="zmm, zmm, zmm" xed="VPROLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="zmm {k}, zmm, zmm" xed="VPROLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="zmm {z}, zmm, zmm" xed="VPROLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rolv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the left by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE LEFT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &lt;&lt; count) OR (src &gt;&gt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LEFT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPROLVQ" form="zmm, zmm, zmm" xed="VPROLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORD" form="zmm {k}, zmm, imm8" xed="VPRORD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORD" form="zmm {z}, zmm, imm8" xed="VPRORD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_ror_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORD" form="zmm, zmm, imm8" xed="VPRORD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORQ" form="zmm {k}, zmm, imm8" xed="VPRORQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORQ" form="zmm {z}, zmm, imm8" xed="VPRORQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_ror_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORQ" form="zmm, zmm, imm8" xed="VPRORQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORVD" form="zmm {k}, zmm, zmm" xed="VPRORVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORVD" form="zmm {z}, zmm, zmm" xed="VPRORVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rorv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Rotate the bits in each packed 32-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_DWORDS(src, count_src) {
+	count := count_src % 32
+	RETURN (src &gt;&gt;count) OR (src &lt;&lt; (32 - count))
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RIGHT_ROTATE_DWORDS(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORVD" form="zmm, zmm, zmm" xed="VPRORVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="zmm {k}, zmm, zmm" xed="VPRORVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="zmm {z}, zmm, zmm" xed="VPRORVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rorv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Rotate the bits in each packed 64-bit integer in "a" to the right by the number of bits specified in the corresponding element of "b", and store the results in "dst".</description>
+	<operation>
+DEFINE RIGHT_ROTATE_QWORDS(src, count_src) {
+	count := count_src % 64
+	RETURN (src &gt;&gt; count) OR (src &lt;&lt; (64 - count))
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RIGHT_ROTATE_QWORDS(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPRORVQ" form="zmm, zmm, zmm" xed="VPRORVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="vm32y, zmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i32scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="vm32y {k}, zmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQD" form="vm64z, ymm" xed="VPSCATTERQD_MEMu32_MASKmskw_YMMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQD" form="vm64z {k}, ymm" xed="VPSCATTERQD_MEMu32_MASKmskw_YMMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQQ" form="vm64z, zmm" xed="VPSCATTERQQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64scatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 64-bit integers from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERQQ" form="vm64z {k}, zmm" xed="VPSCATTERQQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="zmm {z}, zmm, imm8" xed="VPSHUFD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLD" form="zmm {k}, zmm, xmm" xed="VPSLLD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLD" form="zmm {z}, zmm, xmm" xed="VPSLLD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLD" form="zmm {z}, zmm, imm8" xed="VPSLLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sll_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLD" form="zmm, zmm, xmm" xed="VPSLLD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="zmm {k}, zmm, xmm" xed="VPSLLQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="zmm {k}, zmm, imm8" xed="VPSLLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="zmm {z}, zmm, xmm" xed="VPSLLQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="zmm {z}, zmm, imm8" xed="VPSLLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sll_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="zmm, zmm, xmm" xed="VPSLLQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_slli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLQ" form="zmm, zmm, imm8" xed="VPSLLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="zmm {z}, zmm, zmm" xed="VPSLLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="zmm {k}, zmm, zmm" xed="VPSLLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="zmm {z}, zmm, zmm" xed="VPSLLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sllv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVQ" form="zmm, zmm, zmm" xed="VPSLLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAD" form="zmm {k}, zmm, xmm" xed="VPSRAD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAD" form="zmm {z}, zmm, xmm" xed="VPSRAD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAD" form="zmm {z}, zmm, imm8" xed="VPSRAD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sra_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAD" form="zmm, zmm, xmm" xed="VPSRAD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="zmm {k}, zmm, xmm" xed="VPSRAQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="zmm {k}, zmm, imm8" xed="VPSRAQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="zmm {z}, zmm, xmm" xed="VPSRAQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+		ELSE
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="zmm {z}, zmm, imm8" xed="VPSRAQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sra_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+	ELSE
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="zmm, zmm, xmm" xed="VPSRAQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srai_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0x0)
+	ELSE
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAQ" form="zmm, zmm, imm8" xed="VPSRAQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="zmm {z}, zmm, zmm" xed="VPSRAVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="zmm {k}, zmm, zmm" xed="VPSRAVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="zmm {z}, zmm, zmm" xed="VPSRAVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srav_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := SignExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := (a[i+63] ? 0xFFFFFFFFFFFFFFFF : 0)
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVQ" form="zmm, zmm, zmm" xed="VPSRAVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLD" form="zmm {k}, zmm, xmm" xed="VPSRLD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[63:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLD" form="zmm {z}, zmm, xmm" xed="VPSRLD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLD" form="zmm {z}, zmm, imm8" xed="VPSRLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srl_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLD" form="zmm, zmm, xmm" xed="VPSRLD_ZMMu32_MASKmskw_ZMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="zmm {k}, zmm, xmm" xed="VPSRLQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="zmm {k}, zmm, imm8" xed="VPSRLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[63:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="zmm {z}, zmm, xmm" xed="VPSRLQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF imm8[7:0] &gt; 63
+			dst[i+63:i] := 0
+		ELSE
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="zmm {z}, zmm, imm8" xed="VPSRLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srl_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="zmm, zmm, xmm" xed="VPSRLQ_ZMMu64_MASKmskw_ZMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srli_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLQ" form="zmm, zmm, imm8" xed="VPSRLQ_ZMMu64_MASKmskw_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="zmm {z}, zmm, zmm" xed="VPSRLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="zmm {k}, zmm, zmm" xed="VPSRLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		IF count[i+63:i] &lt; 64
+			dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+		ELSE
+			dst[i+63:i] := 0
+		FI
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="zmm {z}, zmm, zmm" xed="VPSRLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_srlv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF count[i+63:i] &lt; 64
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVQ" form="zmm, zmm, zmm" xed="VPSRLVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBD" form="zmm {z}, zmm, zmm" xed="VPSUBD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="zmm {k}, zmm, zmm" xed="VPSUBQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="zmm {z}, zmm, zmm" xed="VPSUBQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sub_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBQ" form="zmm, zmm, zmm" xed="VPSUBQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 32-bit granularity (32-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="zmm {k}, zmm, zmm, imm8" xed="VPTERNLOGD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		FOR h := 0 to 31
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="zmm {z}, zmm, zmm, imm8" xed="VPTERNLOGD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_ternarylogic_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 32-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	FOR h := 0 to 31
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPTERNLOGD" form="zmm, zmm, zmm, imm8" xed="VPTERNLOGD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "src", "a", and "b" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using writemask "k" at 64-bit granularity (64-bit elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (src[i+h] &lt;&lt; 2) OR (a[i+h] &lt;&lt; 1) OR b[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="zmm {k}, zmm, zmm, imm8" xed="VPTERNLOGQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst" using zeromask "k" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		FOR h := 0 to 63
+			index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+			dst[i+h] := imm8[index[2:0]]
+		ENDFOR
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="zmm {z}, zmm, zmm, imm8" xed="VPTERNLOGQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_ternarylogic_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in "imm8". For each bit in each packed 64-bit integer, the corresponding bit from "a", "b", and "c" are used to form a 3 bit index into "imm8", and the value at that bit in "imm8" is written to the corresponding bit in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	FOR h := 0 to 63
+		index[2:0] := (a[i+h] &lt;&lt; 2) OR (b[i+h] &lt;&lt; 1) OR c[i+h]
+		dst[i+h] := imm8[index[2:0]]
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPTERNLOGQ" form="zmm, zmm, zmm, imm8" xed="VPTERNLOGQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTMQ" form="k {k}, zmm, zmm" xed="VPTESTMQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_test_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTMQ" form="k, zmm, zmm" xed="VPTESTMQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTNMD" form="k {k}, zmm, zmm" xed="VPTESTNMD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_testn_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NAND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTNMD" form="k, zmm, zmm" xed="VPTESTNMD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTNMQ" form="k {k}, zmm, zmm" xed="VPTESTNMQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_testn_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NAND of packed 64-bit integers in "a" and "b", producing intermediate 64-bit values, and set the corresponding bit in result mask "k" if the intermediate value is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := ((a[i+63:i] AND b[i+63:i]) == 0) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VPTESTNMQ" form="k, zmm, zmm" xed="VPTESTNMQ_MASKmskw_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="zmm {k}, zmm, zmm" xed="VPUNPCKHDQ_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="zmm {z}, zmm, zmm" xed="VPUNPCKHDQ_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHDQ" form="zmm, zmm, zmm" xed="VPUNPCKHDQ_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="zmm {k}, zmm, zmm" xed="VPUNPCKHQDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="zmm {z}, zmm, zmm" xed="VPUNPCKHQDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKHQDQ" form="zmm, zmm, zmm" xed="VPUNPCKHQDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="zmm {k}, zmm, zmm" xed="VPUNPCKLDQ_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="zmm {z}, zmm, zmm" xed="VPUNPCKLDQ_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLDQ" form="zmm, zmm, zmm" xed="VPUNPCKLDQ_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="zmm {k}, zmm, zmm" xed="VPUNPCKLQDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="zmm {z}, zmm, zmm" xed="VPUNPCKLQDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPUNPCKLQDQ" form="zmm, zmm, zmm" xed="VPUNPCKLQDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPXORD" form="zmm {z}, zmm, zmm" xed="VPXORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm {z}, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="zmm {k}, zmm" xed="VRCP14PD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="zmm {z}, zmm" xed="VRCP14PD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rcp14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1.0 / a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP14PD" form="zmm, zmm" xed="VRCP14PD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="zmm {k}, zmm" xed="VRCP14PS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="zmm {z}, zmm" xed="VRCP14PS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rcp14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP14PS" form="zmm, zmm" xed="VRCP14PS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14SD" form="xmm {k}, xmm, xmm" xed="VRCP14SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14SD" form="xmm {z}, xmm, xmm" xed="VRCP14SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[63:0] := (1.0 / b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14SD" form="xmm, xmm, xmm" xed="VRCP14SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rcp14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14SS" form="xmm {k}, xmm, xmm" xed="VRCP14SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rcp14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14SS" form="xmm {z}, xmm, xmm" xed="VRCP14SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rcp14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[31:0] := (1.0 / b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRCP14SS" form="xmm, xmm, xmm" xed="VRCP14SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="zmm {k}, zmm, imm8" xed="VRNDSCALEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_roundscale_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="zmm {k}, zmm, imm8 {sae}" xed="VRNDSCALEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="zmm {z}, zmm, imm8" xed="VRNDSCALEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_roundscale_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="zmm {z}, zmm, imm8 {sae}" xed="VRNDSCALEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_roundscale_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="zmm, zmm, imm8" xed="VRNDSCALEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_roundscale_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round packed double-precision (64-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RoundScaleFP64(a[i+63:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPD" form="zmm, zmm, imm8 {sae}" xed="VRNDSCALEPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="zmm {k}, zmm, imm8" xed="VRNDSCALEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_roundscale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="zmm {k}, zmm, imm8 {sae}" xed="VRNDSCALEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="zmm {z}, zmm, imm8" xed="VRNDSCALEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_roundscale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="zmm {z}, zmm, imm8 {sae}" xed="VRNDSCALEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_roundscale_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="zmm, zmm, imm8" xed="VRNDSCALEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_roundscale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round packed single-precision (32-bit) floating-point elements in "a" to the number of fraction bits specified by "imm8", and store the results in "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RoundScaleFP32(a[i+31:i], imm8[7:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDSCALEPS" form="zmm, zmm, imm8 {sae}" xed="VRNDSCALEPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_roundscale_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESD" form="xmm {k}, xmm, xmm, imm8 {sae}" xed="VRNDSCALESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_roundscale_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESD" form="xmm {k}, xmm, xmm, imm8" xed="VRNDSCALESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_roundscale_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESD" form="xmm {z}, xmm, xmm, imm8 {sae}" xed="VRNDSCALESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_roundscale_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+IF k[0]
+	dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESD" form="xmm {z}, xmm, xmm, imm8" xed="VRNDSCALESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_roundscale_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESD" form="xmm, xmm, xmm, imm8 {sae}" xed="VRNDSCALESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_roundscale_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP64(src1[63:0], imm8[7:0]) {
+	m[63:0] := FP64(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[63:0] := POW(2.0, -m) * ROUND(POW(2.0, m) * src1[63:0], imm8[3:0])
+	IF IsInf(tmp[63:0])
+		tmp[63:0] := src1[63:0]
+	FI
+	RETURN tmp[63:0]
+}
+dst[63:0] := RoundScaleFP64(b[63:0], imm8[7:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESD" form="xmm, xmm, xmm, imm8" xed="VRNDSCALESD_XMMf64_MASKmskw_XMMf64_XMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_roundscale_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESS" form="xmm {k}, xmm, xmm, imm8 {sae}" xed="VRNDSCALESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_roundscale_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESS" form="xmm {k}, xmm, xmm, imm8" xed="VRNDSCALESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_roundscale_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESS" form="xmm {z}, xmm, xmm, imm8 {sae}" xed="VRNDSCALESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_roundscale_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+IF k[0]
+	dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESS" form="xmm {z}, xmm, xmm, imm8" xed="VRNDSCALESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_roundscale_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESS" form="xmm, xmm, xmm, imm8 {sae}" xed="VRNDSCALESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_roundscale_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_MM_REDUCE"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". [round_imm_note]</description>
+	<operation>
+DEFINE RoundScaleFP32(src1[31:0], imm8[7:0]) {
+	m[31:0] := FP32(imm8[7:4]) // number of fraction bits after the binary point to be preserved
+	tmp[31:0] := POW(FP32(2.0), -m) * ROUND(POW(FP32(2.0), m) * src1[31:0], imm8[3:0])
+	IF IsInf(tmp[31:0])
+		tmp[31:0] := src1[31:0]
+	FI
+	RETURN tmp[31:0]
+}
+dst[31:0] := RoundScaleFP32(b[31:0], imm8[7:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRNDSCALESS" form="xmm, xmm, xmm, imm8" xed="VRNDSCALESS_XMMf32_MASKmskw_XMMf32_XMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT14PD" form="zmm {k}, zmm" xed="VRSQRT14PD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT14PD" form="zmm {z}, zmm" xed="VRSQRT14PD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rsqrt14_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1.0 / SQRT(a[i+63:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT14PD" form="zmm, zmm" xed="VRSQRT14PD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT14PS" form="zmm {k}, zmm" xed="VRSQRT14PS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT14PS" form="zmm {z}, zmm" xed="VRSQRT14PS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_rsqrt14_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT14PS" form="zmm, zmm" xed="VRSQRT14PS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14SD" form="xmm {k}, xmm, xmm" xed="VRSQRT14SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[63:0] := (1.0 / SQRT(b[63:0]))
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14SD" form="xmm {z}, xmm, xmm" xed="VRSQRT14SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rsqrt14_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[63:0] := (1.0 / SQRT(b[63:0]))
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14SD" form="xmm, xmm, xmm" xed="VRSQRT14SD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_rsqrt14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14SS" form="xmm {k}, xmm, xmm" xed="VRSQRT14SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_rsqrt14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+IF k[0]
+	dst[31:0] := (1.0 / SQRT(b[31:0]))
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14SS" form="xmm {z}, xmm, xmm" xed="VRSQRT14SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_rsqrt14_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 2^-14.</description>
+	<operation>
+dst[31:0] := (1.0 / SQRT(b[31:0]))
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VRSQRT14SS" form="xmm, xmm, xmm" xed="VRSQRT14SS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="zmm {k}, zmm, zmm" xed="VSCALEFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_scalef_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="zmm {k}, zmm, zmm {er}" xed="VSCALEFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="zmm {z}, zmm, zmm" xed="VSCALEFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_scalef_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="zmm {z}, zmm, zmm {er}" xed="VSCALEFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_scalef_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="zmm, zmm, zmm" xed="VSCALEFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_scalef_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SCALE(a[i+63:0], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPD" form="zmm, zmm, zmm {er}" xed="VSCALEFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="zmm {k}, zmm, zmm" xed="VSCALEFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_scalef_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="zmm {k}, zmm, zmm {er}" xed="VSCALEFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="zmm {z}, zmm, zmm" xed="VSCALEFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_scalef_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="zmm {z}, zmm, zmm {er}" xed="VSCALEFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_scalef_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="zmm, zmm, zmm" xed="VSCALEFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_scalef_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[31:0]
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SCALE(a[i+31:0], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEFPS" form="zmm, zmm, zmm {er}" xed="VSCALEFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_scalef_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSD" form="xmm {k}, xmm, xmm {er}" xed="VSCALEFSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_scalef_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSD" form="xmm {k}, xmm, xmm" xed="VSCALEFSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_scalef_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSD" form="xmm {z}, xmm, xmm {er}" xed="VSCALEFSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_scalef_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[63:0] := SCALE(a[63:0], b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSD" form="xmm {z}, xmm, xmm" xed="VSCALEFSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_scalef_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+dst[63:0] := SCALE(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSD" form="xmm, xmm, xmm {er}" xed="VSCALEFSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_scalef_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Scale the packed double-precision (64-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[63:0] := tmp_src1[63:0] * POW(2.0, FLOOR(tmp_src2[63:0]))
+	RETURN dst[63:0]
+}
+dst[63:0] := SCALE(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSD" form="xmm, xmm, xmm" xed="VSCALEFSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_scalef_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSS" form="xmm {k}, xmm, xmm {er}" xed="VSCALEFSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_scalef_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSS" form="xmm {k}, xmm, xmm" xed="VSCALEFSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_scalef_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSS" form="xmm {z}, xmm, xmm {er}" xed="VSCALEFSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_scalef_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+IF k[0]
+	dst[31:0] := SCALE(a[31:0], b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSS" form="xmm {z}, xmm, xmm" xed="VSCALEFSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_scalef_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+dst[31:0] := SCALE(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSS" form="xmm, xmm, xmm {er}" xed="VSCALEFSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_scalef_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Scale the packed single-precision (32-bit) floating-point elements in "a" using values from "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>DEFINE SCALE(src1, src2) {
+	IF (src2 == NaN)
+		IF (src2 == SNaN)
+			RETURN QNAN(src2)
+		FI
+	ELSE IF (src1 == NaN)
+		IF (src1 == SNaN)
+			RETURN QNAN(src1)
+		FI
+		IF (src2 != INF)
+			RETURN QNAN(src1)
+		FI
+	ELSE
+		tmp_src2 := src2
+		tmp_src1 := src1
+		IF (IS_DENORMAL(src2) AND MXCSR.DAZ)
+			tmp_src2 := 0
+		FI
+		IF (IS_DENORMAL(src1) AND MXCSR.DAZ)
+			tmp_src1 := 0
+		FI
+	FI
+	dst[31:0] := tmp_src1[31:0] * POW(2.0, FLOOR(tmp_src2[31:0]))
+	RETURN dst[63:0]
+}
+dst[31:0] := SCALE(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSCALEFSS" form="xmm, xmm, xmm" xed="VSCALEFSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="vm32y, zmm" xed="VSCATTERDPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="vm32y {k}, zmm" xed="VSCATTERDPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPD" form="vm32z, zmm" xed="VSCATTERQPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter double-precision (64-bit) floating-point elements from "a" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPD" form="vm32z {k}, zmm" xed="VSCATTERQPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPS" form="vm32z, ymm" xed="VSCATTERQPS_MEMf32_MASKmskw_YMMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERQPS" form="vm32z {k}, ymm" xed="VSCATTERQPS_MEMf32_MASKmskw_YMMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFF32X4" form="zmm {k}, zmm, zmm, imm8" xed="VSHUFF32X4_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFF32X4" form="zmm {z}, zmm, zmm, imm8" xed="VSHUFF32X4_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shuffle_f32x4">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFF32X4" form="zmm, zmm, zmm, imm8" xed="VSHUFF32X4_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFF64X2" form="zmm {k}, zmm, zmm, imm8" xed="VSHUFF64X2_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFF64X2" form="zmm {z}, zmm, zmm, imm8" xed="VSHUFF64X2_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shuffle_f64x2">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFF64X2" form="zmm, zmm, zmm, imm8" xed="VSHUFF64X2_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shuffle_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFI32X4" form="zmm {k}, zmm, zmm, imm8" xed="VSHUFI32X4_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFI32X4" form="zmm {z}, zmm, zmm, imm8" xed="VSHUFI32X4_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shuffle_i32x4">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 4 32-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFI32X4" form="zmm, zmm, zmm, imm8" xed="VSHUFI32X4_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shuffle_i64x2">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFI64X2" form="zmm {k}, zmm, zmm, imm8" xed="VSHUFI64X2_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_i64x2">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp_dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+tmp_dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+tmp_dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+tmp_dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFI64X2" form="zmm {z}, zmm, zmm, imm8" xed="VSHUFI64X2_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shuffle_i64x2">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 128-bits (composed of 2 64-bit integers) selected by "imm8" from "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[127:0] := src[127:0]
+	1:	tmp[127:0] := src[255:128]
+	2:	tmp[127:0] := src[383:256]
+	3:	tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+dst[127:0] := SELECT4(a[511:0], imm8[1:0])
+dst[255:128] := SELECT4(a[511:0], imm8[3:2])
+dst[383:256] := SELECT4(b[511:0], imm8[5:4])
+dst[511:384] := SELECT4(b[511:0], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFI64X2" form="zmm, zmm, zmm, imm8" xed="VSHUFI64X2_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
+tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
+tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
+tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="zmm {k}, zmm, zmm, imm8" xed="VSHUFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+tmp_dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+tmp_dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+tmp_dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+tmp_dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+tmp_dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
+tmp_dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
+tmp_dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
+tmp_dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="zmm {z}, zmm, zmm, imm8" xed="VSHUFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
+dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
+dst[319:256] := (imm8[4] == 0) ? a[319:256] : a[383:320]
+dst[383:320] := (imm8[5] == 0) ? b[319:256] : b[383:320]
+dst[447:384] := (imm8[6] == 0) ? a[447:384] : a[511:448]
+dst[511:448] := (imm8[7] == 0) ? b[447:384] : b[511:448]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFPD" form="zmm, zmm, zmm, imm8" xed="VSHUFPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="zmm {k}, zmm, zmm, imm8" xed="VSHUFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(b[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(b[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(b[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(b[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="zmm {z}, zmm, zmm, imm8" xed="VSHUFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(b[255:128], imm8[5:4])
+dst[255:224] := SELECT4(b[255:128], imm8[7:6])
+dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+dst[351:320] := SELECT4(b[383:256], imm8[5:4])
+dst[383:352] := SELECT4(b[383:256], imm8[7:6])
+dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+dst[479:448] := SELECT4(b[511:384], imm8[5:4])
+dst[511:480] := SELECT4(b[511:384], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSHUFPS" form="zmm, zmm, zmm, imm8" xed="VSHUFPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="zmm {k}, zmm" xed="VSQRTPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sqrt_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="zmm {k}, zmm {er}" xed="VSQRTPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="zmm {z}, zmm" xed="VSQRTPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sqrt_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note].</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="zmm {z}, zmm {er}" xed="VSQRTPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="zmm, zmm" xed="VSQRTPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sqrt_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note].</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPD" form="zmm, zmm {er}" xed="VSQRTPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="zmm {k}, zmm" xed="VSQRTPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_sqrt_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="zmm {k}, zmm {er}" xed="VSQRTPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="zmm {z}, zmm" xed="VSQRTPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sqrt_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="zmm {z}, zmm {er}" xed="VSQRTPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="zmm, zmm" xed="VSQRTPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_sqrt_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note].</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSQRTPS" form="zmm, zmm {er}" xed="VSQRTPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sqrt_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSD" form="xmm {k}, xmm, xmm {er}" xed="VSQRTSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sqrt_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSD" form="xmm {k}, xmm, xmm" xed="VSQRTSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sqrt_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSD" form="xmm {z}, xmm, xmm {er}" xed="VSQRTSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sqrt_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := SQRT(b[63:0])
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSD" form="xmm {z}, xmm, xmm" xed="VSQRTSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_sqrt_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := SQRT(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSD" form="xmm, xmm, xmm {er}" xed="VSQRTSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sqrt_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSS" form="xmm {k}, xmm, xmm {er}" xed="VSQRTSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sqrt_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSS" form="xmm {k}, xmm, xmm" xed="VSQRTSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sqrt_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSS" form="xmm {z}, xmm, xmm {er}" xed="VSQRTSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sqrt_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := SQRT(b[31:0])
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSS" form="xmm {z}, xmm, xmm" xed="VSQRTSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_sqrt_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := SQRT(b[31:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSQRTSS" form="xmm, xmm, xmm {er}" xed="VSQRTSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPD" form="zmm {z}, zmm, zmm" xed="VSUBPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPD" form="zmm {z}, zmm, zmm {er}" xed="VSUBPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPS" form="zmm {z}, zmm, zmm" xed="VSUBPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_sub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPS" form="zmm {z}, zmm, zmm {er}" xed="VSUBPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSD" form="xmm {k}, xmm, xmm {er}" xed="VSUBSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := src[63:0]
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSD" form="xmm {k}, xmm, xmm" xed="VSUBSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSD" form="xmm {z}, xmm, xmm {er}" xed="VSUBSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+IF k[0]
+	dst[63:0] := a[63:0] - b[63:0]
+ELSE
+	dst[63:0] := 0
+FI
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSD" form="xmm {z}, xmm, xmm" xed="VSUBSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_sub_round_sd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := a[63:0] - b[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSD" form="xmm, xmm, xmm {er}" xed="VSUBSD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSS" form="xmm {k}, xmm, xmm {er}" xed="VSUBSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_sub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := src[31:0]
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSS" form="xmm {k}, xmm, xmm" xed="VSUBSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSS" form="xmm {z}, xmm, xmm {er}" xed="VSUBSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_sub_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+IF k[0]
+	dst[31:0] := a[31:0] - b[31:0]
+ELSE
+	dst[31:0] := 0
+FI
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSS" form="xmm {z}, xmm, xmm" xed="VSUBSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_sub_round_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := a[31:0] - b[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VSUBSS" form="xmm, xmm, xmm {er}" xed="VSUBSS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="zmm {k}, zmm, zmm" xed="VUNPCKHPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="zmm {z}, zmm, zmm" xed="VUNPCKHPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKHPD" form="zmm, zmm, zmm" xed="VUNPCKHPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="zmm {k}, zmm, zmm" xed="VUNPCKHPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="zmm {z}, zmm, zmm" xed="VUNPCKHPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_HIGH_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_HIGH_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKHPS" form="zmm, zmm, zmm" xed="VUNPCKHPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="zmm {k}, zmm, zmm" xed="VUNPCKLPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+tmp_dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := tmp_dst[i+63:i]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="zmm {z}, zmm, zmm" xed="VUNPCKLPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_QWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_QWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKLPD" form="zmm, zmm, zmm" xed="VUNPCKLPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="zmm {k}, zmm, zmm" xed="VUNPCKLPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+tmp_dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+tmp_dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+tmp_dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+tmp_dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="zmm {z}, zmm, zmm" xed="VUNPCKLPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
+dst[383:256] := INTERLEAVE_DWORDS(a[383:256], b[383:256])
+dst[511:384] := INTERLEAVE_DWORDS(a[511:384], b[511:384])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VUNPCKLPS" form="zmm, zmm, zmm" xed="VUNPCKLPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castpd128_pd512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castpd256_pd512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castpd512_pd128">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m512d to type __m128d. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castps512_ps128">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m512 to type __m128. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castpd512_pd256">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m512d to type __m256d. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castps128_ps512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castps256_ps512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castps512_ps256">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m512 to type __m256. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castsi128_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m128i" varname="a" etype="M512"/>
+	<description>Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. 
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castsi256_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m256i" varname="a" etype="M512"/>
+	<description>Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined.
+	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castsi512_si128">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="a" etype="M128"/>
+	<description>Cast vector of type __m512i to type __m128i.
+	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_castsi512_si256">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m256i" varname="dst" etype="M256"/>
+	<parameter type="__m512i" varname="a" etype="M256"/>
+	<description>Cast vector of type __m512i to type __m256i.
+	 This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_zextpd128_pd512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_zextps128_ps512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_zextsi128_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m128i" varname="a" etype="M512"/>
+	<description>Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_zextpd256_pd512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_zextps256_ps512">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_zextsi256_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Cast</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m256i" varname="a" etype="M512"/>
+	<description>Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set1_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set1_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="d" etype="UI32"/>
+	<parameter type="int" varname="c" etype="UI32"/>
+	<parameter type="int" varname="b" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[31:0] := a
+dst[63:32] := b
+dst[95:64] := c
+dst[127:96] := d
+dst[159:128] := a
+dst[191:160] := b
+dst[223:192] := c
+dst[255:224] := d
+dst[287:256] := a
+dst[319:288] := b
+dst[351:320] := c
+dst[383:352] := d
+dst[415:384] := a
+dst[447:416] := b
+dst[479:448] := c
+dst[511:480] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="d" etype="UI64"/>
+	<parameter type="__int64" varname="c" etype="UI64"/>
+	<parameter type="__int64" varname="b" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[63:0] := a
+dst[127:64] := b
+dst[191:128] := c
+dst[255:192] := d
+dst[319:256] := a
+dst[383:320] := b
+dst[447:384] := c
+dst[511:448] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="d" etype="FP64"/>
+	<parameter type="double" varname="c" etype="FP64"/>
+	<parameter type="double" varname="b" etype="FP64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[63:0] := a
+dst[127:64] := b
+dst[191:128] := c
+dst[255:192] := d
+dst[319:256] := a
+dst[383:320] := b
+dst[447:384] := c
+dst[511:448] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="d" etype="FP32"/>
+	<parameter type="float" varname="c" etype="FP32"/>
+	<parameter type="float" varname="b" etype="FP32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence.</description>
+	<operation>
+dst[31:0] := a
+dst[63:32] := b
+dst[95:64] := c
+dst[127:96] := d
+dst[159:128] := a
+dst[191:160] := b
+dst[223:192] := c
+dst[255:224] := d
+dst[287:256] := a
+dst[319:288] := b
+dst[351:320] := c
+dst[383:352] := d
+dst[415:384] := a
+dst[447:416] := b
+dst[479:448] := c
+dst[511:480] := d
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="e63" etype="UI8"/>
+	<parameter type="char" varname="e62" etype="UI8"/>
+	<parameter type="char" varname="e61" etype="UI8"/>
+	<parameter type="char" varname="e60" etype="UI8"/>
+	<parameter type="char" varname="e59" etype="UI8"/>
+	<parameter type="char" varname="e58" etype="UI8"/>
+	<parameter type="char" varname="e57" etype="UI8"/>
+	<parameter type="char" varname="e56" etype="UI8"/>
+	<parameter type="char" varname="e55" etype="UI8"/>
+	<parameter type="char" varname="e54" etype="UI8"/>
+	<parameter type="char" varname="e53" etype="UI8"/>
+	<parameter type="char" varname="e52" etype="UI8"/>
+	<parameter type="char" varname="e51" etype="UI8"/>
+	<parameter type="char" varname="e50" etype="UI8"/>
+	<parameter type="char" varname="e49" etype="UI8"/>
+	<parameter type="char" varname="e48" etype="UI8"/>
+	<parameter type="char" varname="e47" etype="UI8"/>
+	<parameter type="char" varname="e46" etype="UI8"/>
+	<parameter type="char" varname="e45" etype="UI8"/>
+	<parameter type="char" varname="e44" etype="UI8"/>
+	<parameter type="char" varname="e43" etype="UI8"/>
+	<parameter type="char" varname="e42" etype="UI8"/>
+	<parameter type="char" varname="e41" etype="UI8"/>
+	<parameter type="char" varname="e40" etype="UI8"/>
+	<parameter type="char" varname="e39" etype="UI8"/>
+	<parameter type="char" varname="e38" etype="UI8"/>
+	<parameter type="char" varname="e37" etype="UI8"/>
+	<parameter type="char" varname="e36" etype="UI8"/>
+	<parameter type="char" varname="e35" etype="UI8"/>
+	<parameter type="char" varname="e34" etype="UI8"/>
+	<parameter type="char" varname="e33" etype="UI8"/>
+	<parameter type="char" varname="e32" etype="UI8"/>
+	<parameter type="char" varname="e31" etype="UI8"/>
+	<parameter type="char" varname="e30" etype="UI8"/>
+	<parameter type="char" varname="e29" etype="UI8"/>
+	<parameter type="char" varname="e28" etype="UI8"/>
+	<parameter type="char" varname="e27" etype="UI8"/>
+	<parameter type="char" varname="e26" etype="UI8"/>
+	<parameter type="char" varname="e25" etype="UI8"/>
+	<parameter type="char" varname="e24" etype="UI8"/>
+	<parameter type="char" varname="e23" etype="UI8"/>
+	<parameter type="char" varname="e22" etype="UI8"/>
+	<parameter type="char" varname="e21" etype="UI8"/>
+	<parameter type="char" varname="e20" etype="UI8"/>
+	<parameter type="char" varname="e19" etype="UI8"/>
+	<parameter type="char" varname="e18" etype="UI8"/>
+	<parameter type="char" varname="e17" etype="UI8"/>
+	<parameter type="char" varname="e16" etype="UI8"/>
+	<parameter type="char" varname="e15" etype="UI8"/>
+	<parameter type="char" varname="e14" etype="UI8"/>
+	<parameter type="char" varname="e13" etype="UI8"/>
+	<parameter type="char" varname="e12" etype="UI8"/>
+	<parameter type="char" varname="e11" etype="UI8"/>
+	<parameter type="char" varname="e10" etype="UI8"/>
+	<parameter type="char" varname="e9" etype="UI8"/>
+	<parameter type="char" varname="e8" etype="UI8"/>
+	<parameter type="char" varname="e7" etype="UI8"/>
+	<parameter type="char" varname="e6" etype="UI8"/>
+	<parameter type="char" varname="e5" etype="UI8"/>
+	<parameter type="char" varname="e4" etype="UI8"/>
+	<parameter type="char" varname="e3" etype="UI8"/>
+	<parameter type="char" varname="e2" etype="UI8"/>
+	<parameter type="char" varname="e1" etype="UI8"/>
+	<parameter type="char" varname="e0" etype="UI8"/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+dst[71:64] := e8
+dst[79:72] := e9
+dst[87:80] := e10
+dst[95:88] := e11
+dst[103:96] := e12
+dst[111:104] := e13
+dst[119:112] := e14
+dst[127:120] := e15
+dst[135:128] := e16
+dst[143:136] := e17
+dst[151:144] := e18
+dst[159:152] := e19
+dst[167:160] := e20
+dst[175:168] := e21
+dst[183:176] := e22
+dst[191:184] := e23
+dst[199:192] := e24
+dst[207:200] := e25
+dst[215:208] := e26
+dst[223:216] := e27
+dst[231:224] := e28
+dst[239:232] := e29
+dst[247:240] := e30
+dst[255:248] := e31
+dst[263:256] := e32
+dst[271:264] := e33
+dst[279:272] := e34
+dst[287:280] := e35
+dst[295:288] := e36
+dst[303:296] := e37
+dst[311:304] := e38
+dst[319:312] := e39
+dst[327:320] := e40
+dst[335:328] := e41
+dst[343:336] := e42
+dst[351:344] := e43
+dst[359:352] := e44
+dst[367:360] := e45
+dst[375:368] := e46
+dst[383:376] := e47
+dst[391:384] := e48
+dst[399:392] := e49
+dst[407:400] := e50
+dst[415:408] := e51
+dst[423:416] := e52
+dst[431:424] := e53
+dst[439:432] := e54
+dst[447:440] := e55
+dst[455:448] := e56
+dst[463:456] := e57
+dst[471:464] := e58
+dst[479:472] := e59
+dst[487:480] := e60
+dst[495:488] := e61
+dst[503:496] := e62
+dst[511:504] := e63
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="e31" etype="UI16"/>
+	<parameter type="short" varname="e30" etype="UI16"/>
+	<parameter type="short" varname="e29" etype="UI16"/>
+	<parameter type="short" varname="e28" etype="UI16"/>
+	<parameter type="short" varname="e27" etype="UI16"/>
+	<parameter type="short" varname="e26" etype="UI16"/>
+	<parameter type="short" varname="e25" etype="UI16"/>
+	<parameter type="short" varname="e24" etype="UI16"/>
+	<parameter type="short" varname="e23" etype="UI16"/>
+	<parameter type="short" varname="e22" etype="UI16"/>
+	<parameter type="short" varname="e21" etype="UI16"/>
+	<parameter type="short" varname="e20" etype="UI16"/>
+	<parameter type="short" varname="e19" etype="UI16"/>
+	<parameter type="short" varname="e18" etype="UI16"/>
+	<parameter type="short" varname="e17" etype="UI16"/>
+	<parameter type="short" varname="e16" etype="UI16"/>
+	<parameter type="short" varname="e15" etype="UI16"/>
+	<parameter type="short" varname="e14" etype="UI16"/>
+	<parameter type="short" varname="e13" etype="UI16"/>
+	<parameter type="short" varname="e12" etype="UI16"/>
+	<parameter type="short" varname="e11" etype="UI16"/>
+	<parameter type="short" varname="e10" etype="UI16"/>
+	<parameter type="short" varname="e9" etype="UI16"/>
+	<parameter type="short" varname="e8" etype="UI16"/>
+	<parameter type="short" varname="e7" etype="UI16"/>
+	<parameter type="short" varname="e6" etype="UI16"/>
+	<parameter type="short" varname="e5" etype="UI16"/>
+	<parameter type="short" varname="e4" etype="UI16"/>
+	<parameter type="short" varname="e3" etype="UI16"/>
+	<parameter type="short" varname="e2" etype="UI16"/>
+	<parameter type="short" varname="e1" etype="UI16"/>
+	<parameter type="short" varname="e0" etype="UI16"/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+dst[79:64] := e4
+dst[95:80] := e5
+dst[111:96] := e6
+dst[127:112] := e7
+dst[143:128] := e8
+dst[159:144] := e9
+dst[175:160] := e10
+dst[191:176] := e11
+dst[207:192] := e12
+dst[223:208] := e13
+dst[239:224] := e14
+dst[255:240] := e15
+dst[271:256] := e16
+dst[287:272] := e17
+dst[303:288] := e18
+dst[319:304] := e19
+dst[335:320] := e20
+dst[351:336] := e21
+dst[367:352] := e22
+dst[383:368] := e23
+dst[399:384] := e24
+dst[415:400] := e25
+dst[431:416] := e26
+dst[447:432] := e27
+dst[463:448] := e28
+dst[479:464] := e29
+dst[495:480] := e30
+dst[511:496] := e31
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="e15" etype="UI32"/>
+	<parameter type="int" varname="e14" etype="UI32"/>
+	<parameter type="int" varname="e13" etype="UI32"/>
+	<parameter type="int" varname="e12" etype="UI32"/>
+	<parameter type="int" varname="e11" etype="UI32"/>
+	<parameter type="int" varname="e10" etype="UI32"/>
+	<parameter type="int" varname="e9" etype="UI32"/>
+	<parameter type="int" varname="e8" etype="UI32"/>
+	<parameter type="int" varname="e7" etype="UI32"/>
+	<parameter type="int" varname="e6" etype="UI32"/>
+	<parameter type="int" varname="e5" etype="UI32"/>
+	<parameter type="int" varname="e4" etype="UI32"/>
+	<parameter type="int" varname="e3" etype="UI32"/>
+	<parameter type="int" varname="e2" etype="UI32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[287:256] := e8
+dst[319:288] := e9
+dst[351:320] := e10
+dst[383:352] := e11
+dst[415:384] := e12
+dst[447:416] := e13
+dst[479:448] := e14
+dst[511:480] := e15
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="e7" etype="UI64"/>
+	<parameter type="__int64" varname="e6" etype="UI64"/>
+	<parameter type="__int64" varname="e5" etype="UI64"/>
+	<parameter type="__int64" varname="e4" etype="UI64"/>
+	<parameter type="__int64" varname="e3" etype="UI64"/>
+	<parameter type="__int64" varname="e2" etype="UI64"/>
+	<parameter type="__int64" varname="e1" etype="UI64"/>
+	<parameter type="__int64" varname="e0" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[319:256] := e4
+dst[383:320] := e5
+dst[447:384] := e6
+dst[511:448] := e7
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="e7" etype="FP64"/>
+	<parameter type="double" varname="e6" etype="FP64"/>
+	<parameter type="double" varname="e5" etype="FP64"/>
+	<parameter type="double" varname="e4" etype="FP64"/>
+	<parameter type="double" varname="e3" etype="FP64"/>
+	<parameter type="double" varname="e2" etype="FP64"/>
+	<parameter type="double" varname="e1" etype="FP64"/>
+	<parameter type="double" varname="e0" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+dst[191:128] := e2
+dst[255:192] := e3
+dst[319:256] := e4
+dst[383:320] := e5
+dst[447:384] := e6
+dst[511:448] := e7
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_set_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="e15" etype="FP32"/>
+	<parameter type="float" varname="e14" etype="FP32"/>
+	<parameter type="float" varname="e13" etype="FP32"/>
+	<parameter type="float" varname="e12" etype="FP32"/>
+	<parameter type="float" varname="e11" etype="FP32"/>
+	<parameter type="float" varname="e10" etype="FP32"/>
+	<parameter type="float" varname="e9" etype="FP32"/>
+	<parameter type="float" varname="e8" etype="FP32"/>
+	<parameter type="float" varname="e7" etype="FP32"/>
+	<parameter type="float" varname="e6" etype="FP32"/>
+	<parameter type="float" varname="e5" etype="FP32"/>
+	<parameter type="float" varname="e4" etype="FP32"/>
+	<parameter type="float" varname="e3" etype="FP32"/>
+	<parameter type="float" varname="e2" etype="FP32"/>
+	<parameter type="float" varname="e1" etype="FP32"/>
+	<parameter type="float" varname="e0" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+dst[159:128] := e4
+dst[191:160] := e5
+dst[223:192] := e6
+dst[255:224] := e7
+dst[287:256] := e8
+dst[319:288] := e9
+dst[351:320] := e10
+dst[383:352] := e11
+dst[415:384] := e12
+dst[447:416] := e13
+dst[479:448] := e14
+dst[511:480] := e15
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr4_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="d" etype="UI32"/>
+	<parameter type="int" varname="c" etype="UI32"/>
+	<parameter type="int" varname="b" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[31:0] := d
+dst[63:32] := c
+dst[95:64] := b
+dst[127:96] := a
+dst[159:128] := d
+dst[191:160] := c
+dst[223:192] := b
+dst[255:224] := a
+dst[287:256] := d
+dst[319:288] := c
+dst[351:320] := b
+dst[383:352] := a
+dst[415:384] := d
+dst[447:416] := c
+dst[479:448] := b
+dst[511:480] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr4_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="d" etype="UI64"/>
+	<parameter type="__int64" varname="c" etype="UI64"/>
+	<parameter type="__int64" varname="b" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[63:0] := d
+dst[127:64] := c
+dst[191:128] := b
+dst[255:192] := a
+dst[319:256] := d
+dst[383:320] := c
+dst[447:384] := b
+dst[511:448] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr4_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="d" etype="FP64"/>
+	<parameter type="double" varname="c" etype="FP64"/>
+	<parameter type="double" varname="b" etype="FP64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[63:0] := d
+dst[127:64] := c
+dst[191:128] := b
+dst[255:192] := a
+dst[319:256] := d
+dst[383:320] := c
+dst[447:384] := b
+dst[511:448] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr4_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="d" etype="FP32"/>
+	<parameter type="float" varname="c" etype="FP32"/>
+	<parameter type="float" varname="b" etype="FP32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the repeated 4 element sequence in reverse order.</description>
+	<operation>
+dst[31:0] := d
+dst[63:32] := c
+dst[95:64] := b
+dst[127:96] := a
+dst[159:128] := d
+dst[191:160] := c
+dst[223:192] := b
+dst[255:224] := a
+dst[287:256] := d
+dst[319:288] := c
+dst[351:320] := b
+dst[383:352] := a
+dst[415:384] := d
+dst[447:416] := c
+dst[479:448] := b
+dst[511:480] := a
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="e15" etype="UI32"/>
+	<parameter type="int" varname="e14" etype="UI32"/>
+	<parameter type="int" varname="e13" etype="UI32"/>
+	<parameter type="int" varname="e12" etype="UI32"/>
+	<parameter type="int" varname="e11" etype="UI32"/>
+	<parameter type="int" varname="e10" etype="UI32"/>
+	<parameter type="int" varname="e9" etype="UI32"/>
+	<parameter type="int" varname="e8" etype="UI32"/>
+	<parameter type="int" varname="e7" etype="UI32"/>
+	<parameter type="int" varname="e6" etype="UI32"/>
+	<parameter type="int" varname="e5" etype="UI32"/>
+	<parameter type="int" varname="e4" etype="UI32"/>
+	<parameter type="int" varname="e3" etype="UI32"/>
+	<parameter type="int" varname="e2" etype="UI32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e15
+dst[63:32] := e14
+dst[95:64] := e13
+dst[127:96] := e12
+dst[159:128] := e11
+dst[191:160] := e10
+dst[223:192] := e9
+dst[255:224] := e8
+dst[287:256] := e7
+dst[319:288] := e6
+dst[351:320] := e5
+dst[383:352] := e4
+dst[415:384] := e3
+dst[447:416] := e2
+dst[479:448] := e1
+dst[511:480] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="e7" etype="UI64"/>
+	<parameter type="__int64" varname="e6" etype="UI64"/>
+	<parameter type="__int64" varname="e5" etype="UI64"/>
+	<parameter type="__int64" varname="e4" etype="UI64"/>
+	<parameter type="__int64" varname="e3" etype="UI64"/>
+	<parameter type="__int64" varname="e2" etype="UI64"/>
+	<parameter type="__int64" varname="e1" etype="UI64"/>
+	<parameter type="__int64" varname="e0" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e7
+dst[127:64] := e6
+dst[191:128] := e5
+dst[255:192] := e4
+dst[319:256] := e3
+dst[383:320] := e2
+dst[447:384] := e1
+dst[511:448] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="e7" etype="FP64"/>
+	<parameter type="double" varname="e6" etype="FP64"/>
+	<parameter type="double" varname="e5" etype="FP64"/>
+	<parameter type="double" varname="e4" etype="FP64"/>
+	<parameter type="double" varname="e3" etype="FP64"/>
+	<parameter type="double" varname="e2" etype="FP64"/>
+	<parameter type="double" varname="e1" etype="FP64"/>
+	<parameter type="double" varname="e0" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e7
+dst[127:64] := e6
+dst[191:128] := e5
+dst[255:192] := e4
+dst[319:256] := e3
+dst[383:320] := e2
+dst[447:384] := e1
+dst[511:448] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_setr_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="e15" etype="FP32"/>
+	<parameter type="float" varname="e14" etype="FP32"/>
+	<parameter type="float" varname="e13" etype="FP32"/>
+	<parameter type="float" varname="e12" etype="FP32"/>
+	<parameter type="float" varname="e11" etype="FP32"/>
+	<parameter type="float" varname="e10" etype="FP32"/>
+	<parameter type="float" varname="e9" etype="FP32"/>
+	<parameter type="float" varname="e8" etype="FP32"/>
+	<parameter type="float" varname="e7" etype="FP32"/>
+	<parameter type="float" varname="e6" etype="FP32"/>
+	<parameter type="float" varname="e5" etype="FP32"/>
+	<parameter type="float" varname="e4" etype="FP32"/>
+	<parameter type="float" varname="e3" etype="FP32"/>
+	<parameter type="float" varname="e2" etype="FP32"/>
+	<parameter type="float" varname="e1" etype="FP32"/>
+	<parameter type="float" varname="e0" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e15
+dst[63:32] := e14
+dst[95:64] := e13
+dst[127:96] := e12
+dst[159:128] := e11
+dst[191:160] := e10
+dst[223:192] := e9
+dst[255:224] := e8
+dst[287:256] := e7
+dst[319:288] := e6
+dst[351:320] := e5
+dst[383:352] := e4
+dst[415:384] := e3
+dst[447:416] := e2
+dst[479:448] := e1
+dst[511:480] := e0
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_setzero">
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m512 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_setzero_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<description>Return vector of type __m512i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_setzero_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<description>Return vector of type __m512d with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_setzero_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<description>Return vector of type __m512 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_setzero_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Set</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<description>Return vector of type __m512i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_undefined">
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m512 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_undefined_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<description>Return vector of type __m512i with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_undefined_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<description>Return vector of type __m512d with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_undefined_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>General Support</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<description>Return vector of type __m512 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_acos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ACOS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_acos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ACOS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_acos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ACOS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_acos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ACOS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_acosh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ACOSH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_acosh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ACOSH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_acosh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ACOSH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_acosh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ACOSH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_asin_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ASIN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_asin_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ASIN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_asin_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ASIN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_asin_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ASIN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_asinh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ASINH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_asinh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ASINH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_asinh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ASINH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_asinh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ASINH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_atan2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_atan2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_atan2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_atan2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_atan_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_atan_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ATAN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_atan_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_atan_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ATAN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_atanh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ATANH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_atanh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" expressed in radians using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ATANH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_atanh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ATANH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_atanh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ATANH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cbrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := CubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cbrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := CubeRoot(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cbrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := CubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cbrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := CubeRoot(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cdfnorm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := CDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cdfnorm_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := CDFNormal(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cdfnorm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := CDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cdfnorm_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := CDFNormal(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cdfnorminv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cdfnorminv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cdfnorminv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cdfnorminv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_ceil_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_ceil_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := CEIL(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_ceil_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_ceil_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := CEIL(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := COS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := COS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cosd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := COSD(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cosd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := COSD(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cosd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := COSD(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cosd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := COSD(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cosh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := COSH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cosh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := COSH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_cosh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := COSH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_cosh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := COSH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erf_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ERF(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erf_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ERF(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erfc_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erfc_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erf_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ERF(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erf_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ERF(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erfc_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+63:i] := 1.0 - ERF(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erfc_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+63:i] := 1.0 - ERF(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erfinv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erfinv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erfinv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+63:i] := 1.0 / ERF(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erfinv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+63:i] := 1.0 / ERF(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erfcinv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erfcinv_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_erfcinv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_erfcinv_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_exp10_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POW(10.0, a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_exp10_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(10.0, a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_exp10_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POW(FP32(10.0), a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_exp10_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(10.0), a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_exp2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POW(2.0, a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_exp2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(2.0, a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_exp2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_exp2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_exp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POW(e, a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_exp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(e, a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_exp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POW(FP32(e), a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_exp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(e), a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_expm1_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POW(e, a[i+63:i]) - 1.0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_expm1_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(e, a[i+63:i]) - 1.0
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_expm1_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_expm1_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_floor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_floor_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FLOOR(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_floor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_floor_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FLOOR(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_hypot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_hypot_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0))
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_hypot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_hypot_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF b[i+31:i] == 0
+		#DE
+	FI
+	dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_div_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Divide packed signed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		IF b[i+31:i] == 0
+			#DE
+		FI
+		dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="SI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := 8*j
+	IF b[i+7:i] == 0
+		#DE
+	FI
+	dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	IF b[i+15:i] == 0
+		#DE
+	FI
+	dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<parameter type="__m512i" varname="b" etype="SI64"/>
+	<description>Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF b[i+63:i] == 0
+		#DE
+	FI
+	dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_invsqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := InvSQRT(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_invsqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := InvSQRT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_invsqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := InvSQRT(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_invsqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := InvSQRT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_rem_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 63
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 31
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_log10_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_log10_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_log10_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_log10_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_log1p_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LOG(1.0 + a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_log1p_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LOG(1.0 + a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_log1p_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LOG(1.0 + a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_log1p_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LOG(1.0 + a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_log2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_log2_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_log_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_log_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := LOG(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_log_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_log_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LOG(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_logb_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_logb_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_logb_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_logb_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_nearbyint_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := NearbyInt(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_nearbyint_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Rounds each packed double-precision (64-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := NearbyInt(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_nearbyint_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := NearbyInt(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_nearbyint_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Rounds each packed single-precision (32-bit) floating-point element in "a" to the nearest integer value and stores the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := NearbyInt(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_pow_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POW(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_pow_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POW(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_pow_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POW(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_pow_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POW(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_recip_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (1.0 / a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_recip_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Computes the reciprocal of packed double-precision (64-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (1.0 / a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_recip_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_recip_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Computes the reciprocal of packed single-precision (32-bit) floating-point elements in "a", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rint_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := RoundToNearestEven(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_rint_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Rounds the packed double-precision (64-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := RoundToNearestEven(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rint_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := RoundToNearestEven(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_rint_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Rounds the packed single-precision (32-bit) floating-point elements in "a" to the nearest even integer value and stores the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := RoundToNearestEven(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_svml_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_svml_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ROUND(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i] 
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sin_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sin_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SIN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sin_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sin_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SIN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sinh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SINH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sinh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SINH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sinh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SINH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sinh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SINH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sind_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SIND(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sind_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SIND(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sind_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SIND(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sind_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SIND(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_tan_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TAN(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_tan_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TAN(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_tan_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TAN(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_tan_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TAN(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_tand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TAND(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_tand_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TAND(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_tand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TAND(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_tand_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TAND(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_tanh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TANH(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_tanh_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TANH(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_tanh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TANH(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_tanh_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TANH(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_trunc_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := TRUNCATE(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_trunc_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := TRUNCATE(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_trunc_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := TRUNCATE(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_trunc_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := TRUNCATE(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF b[i+31:i] == 0
+		#DE
+	FI
+	dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_div_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		IF b[i+31:i] == 0
+			#DE
+		FI
+		dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := 8*j
+	IF b[i+7:i] == 0
+		#DE
+	FI
+	dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := 16*j
+	IF b[i+15:i] == 0
+		#DE
+	FI
+	dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_div_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 64*j
+	IF b[i+63:i] == 0
+		#DE
+	FI
+	dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_rem_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := 32*j
+	IF k[j]
+		dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epu8">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 63
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epu16">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 31
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_rem_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kortestz">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0.</description>
+	<operation>dst[15:0] := k1[15:0] | k2[15:0]
+IF dst == 0
+	SetZF()
+FI
+	</operation>
+	<instruction name="KORTESTW" form="k, k" xed="KORTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_kortestc">
+	<type>Mask</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's.</description>
+	<operation>dst[15:0] := k1[15:0] | k2[15:0]
+IF PopCount(dst[15:0]) == 16
+	SetCF()
+FI
+	</operation>
+	<instruction name="KORTESTW" form="k, k" xed="KORTESTW_MASKmskw_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask2int">
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<description>Converts bit mask "k1" into an integer value, storing the results in "dst".</description>
+	<operation>
+dst := ZeroExtend32(k1)
+	</operation>
+	<instruction name="KMOVW" form="r32, k" xed="KMOVW_GPR32u32_MASKmskw_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_int2mask">
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="dst" etype="MASK"/>
+	<parameter type="int" varname="mask" etype="UI16"/>
+	<description>Converts integer "mask" into bitmask, storing the result in "dst".</description>
+	<operation>
+dst := mask[15:0]
+	</operation>
+	<instruction name="KMOVW" form="k, r32" xed="KMOVW_MASKmskw_GPR32u32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_mullox_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" sequence="TRUE" name="_mm512_mask_mullox_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Store</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Multiplies elements in packed 64-bit integer vectors "a" and "b" together, storing the lower 64 bits of the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sincos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d *" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+	MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sincos_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d *" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="sin_src" etype="FP64"/>
+	<parameter type="__m512d" varname="cos_src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := SIN(a[i+63:i])
+		MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
+	ELSE
+		dst[i+63:i] := sin_src[i+63:i]
+		MEM[mem_addr+i+63:mem_addr+i] := cos_src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_sincos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512 *" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm512_mask_sincos_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512 *" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="sin_src" etype="FP32"/>
+	<parameter type="__m512" varname="cos_src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", store the cosine into memory at "mem_addr". Elements are written to their respective locations using writemask "k" (elements are copied from "sin_src" or "cos_src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := SIN(a[i+31:i])
+		MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
+	ELSE
+		dst[i+31:i] := sin_src[i+31:i]
+		MEM[mem_addr+i+31:mem_addr+i] := cos_src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+cos_res[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_cvtss_f32">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="VMOVSS" form="m32, xmm" xed="VMOVSS_MEMf32_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_cvtsd_f64">
+	<type>Floating Point</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="VMOVSD" form="m64, xmm" xed="VMOVSD_MEMq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_cvtsi512_si32">
+	<type>Integer</type>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="VMOVD" form="r32, xmm" xed="VMOVD_GPR32u32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPD" form="zmm, zmm, zmm" xed="VADDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_add_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPD" form="zmm, zmm, zmm {er}" xed="VADDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPD" form="zmm {k}, zmm, zmm" xed="VADDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_add_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPD" form="zmm {k}, zmm, zmm {er}" xed="VADDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPS" form="zmm, zmm, zmm" xed="VADDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_add_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPS" form="zmm, zmm, zmm {er}" xed="VADDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPS" form="zmm {k}, zmm, zmm" xed="VADDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_add_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDPS" form="zmm {k}, zmm, zmm {er}" xed="VADDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst".</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (32*imm8[3:0])
+dst[511:0] := temp[511:0]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VALIGND" form="zmm, zmm, zmm, imm8" xed="VALIGND_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_alignr_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Concatenate "a" and "b" into a 128-byte immediate result, shift the result right by "imm8" 32-bit elements, and store the low 64 bytes (16 elements) in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+temp[1023:512] := a[511:0]
+temp[511:0] := b[511:0]
+temp[1023:0] := temp[1023:0] &gt;&gt; (32*imm8[3:0])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := temp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VALIGND" form="zmm {k}, zmm, zmm, imm8" xed="VALIGND_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBLENDMPD" form="zmm {k}, zmm, zmm" xed="VBLENDMPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VBLENDMPS" form="zmm {k}, zmm, zmm" xed="VBLENDMPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmp_round_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] OP b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm {sae}, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpeq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0
+ENDFOR	
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmple_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmplt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpneq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpnle_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (!(a[i+63:i] &lt;= b[i+63:i])) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpnlt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k[j] := (!(a[i+63:i] &lt; b[i+63:i])) ? 1 : 0
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0 
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpunord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0 
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmp_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmp_round_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := ( a[i+63:i] OP b[i+63:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm {sae}, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpeq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] == b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR	
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmple_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] &lt;= b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmplt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] &lt; b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpneq_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] != b[i+63:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpnle_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (!(a[i+63:i] &lt;= b[i+63:i])) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpnlt_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (!(a[i+63:i] &lt; b[i+63:i])) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpunord_pd_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__mmask8" varname="k1" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k1[j]
+		k[j] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:8] := 0
+	</operation>
+	<instruction name="VCMPPD" form="k {k}, zmm, zmm, imm8" xed="VCMPPD_MASKmskw_MASKmskw_ZMMf64_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmp_round_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k". [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] OP b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm {sae}, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpeq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0
+ENDFOR	
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmple_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmplt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpneq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpnle_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (!(a[i+31:i] &lt;= b[i+31:i])) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpnlt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := (!(a[i+31:i] &lt; b[i+31:i])) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpunord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmp_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmp_round_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immtype="_CMP_"/>
+	<parameter type="const int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]</description>
+	<operation>CASE (imm8[4:0]) OF
+0: OP := _CMP_EQ_OQ
+1: OP := _CMP_LT_OS
+2: OP := _CMP_LE_OS
+3: OP := _CMP_UNORD_Q 
+4: OP := _CMP_NEQ_UQ
+5: OP := _CMP_NLT_US
+6: OP := _CMP_NLE_US
+7: OP := _CMP_ORD_Q
+8: OP := _CMP_EQ_UQ
+9: OP := _CMP_NGE_US
+10: OP := _CMP_NGT_US
+11: OP := _CMP_FALSE_OQ
+12: OP := _CMP_NEQ_OQ
+13: OP := _CMP_GE_OS
+14: OP := _CMP_GT_OS
+15: OP := _CMP_TRUE_UQ
+16: OP := _CMP_EQ_OS
+17: OP := _CMP_LT_OQ
+18: OP := _CMP_LE_OQ
+19: OP := _CMP_UNORD_S
+20: OP := _CMP_NEQ_US
+21: OP := _CMP_NLT_UQ
+22: OP := _CMP_NLE_UQ
+23: OP := _CMP_ORD_S
+24: OP := _CMP_EQ_US
+25: OP := _CMP_NGE_UQ 
+26: OP := _CMP_NGT_UQ 
+27: OP := _CMP_FALSE_OS 
+28: OP := _CMP_NEQ_OS 
+29: OP := _CMP_GE_OQ
+30: OP := _CMP_GT_OQ
+31: OP := _CMP_TRUE_US
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm {sae}, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpeq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] == b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR		
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmple_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] &lt;= b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmplt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] &lt; b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpneq_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (a[i+31:i] != b[i+31:i]) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpnle_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (!(a[i+31:i] &lt;= b[i+31:i])) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpnlt_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := (!(a[i+31:i] &lt; b[i+31:i])) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] != NaN) AND (b[i+31:i] != NaN)) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpunord_ps_mask">
+	<type>Floating Point</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] == NaN) OR (b[i+31:i] == NaN)) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VCMPPS" form="k {k}, zmm, zmm, imm8" xed="VCMPPS_MASKmskw_MASKmskw_ZMMf32_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm, zmm, zmm" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm, zmm, zmm" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm, zmm, zmm" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm, zmm, zmm {er}" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm, zmm, zmm {er}" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm, zmm, zmm {er}" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm {k}, zmm, zmm" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm {k}, zmm, zmm" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm {k}, zmm, zmm" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE 
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm {k}, zmm, zmm" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm {k}, zmm, zmm" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm {k}, zmm, zmm" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMADD231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm, zmm, zmm" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm, zmm, zmm" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm, zmm, zmm" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm, zmm, zmm {er}" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm, zmm, zmm {er}" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm, zmm, zmm {er}" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm {k}, zmm, zmm" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm {k}, zmm, zmm" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm {k}, zmm, zmm" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm {k}, zmm, zmm" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm {k}, zmm, zmm" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm {k}, zmm, zmm" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMADD231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm, zmm, zmm" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm, zmm, zmm" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm, zmm, zmm" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm, zmm, zmm {er}" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm, zmm, zmm {er}" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm, zmm, zmm {er}" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm {k}, zmm, zmm" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm {k}, zmm, zmm" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm {k}, zmm, zmm" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm {k}, zmm, zmm" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm {k}, zmm, zmm" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm {k}, zmm, zmm" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB213PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFMSUB231PD" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm, zmm, zmm" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm, zmm, zmm" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm, zmm, zmm" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm, zmm, zmm {er}" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm, zmm, zmm {er}" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm, zmm, zmm {er}" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm {k}, zmm, zmm" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm {k}, zmm, zmm" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm {k}, zmm, zmm" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm {k}, zmm, zmm" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm {k}, zmm, zmm" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm {k}, zmm, zmm" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB213PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFMSUB231PS" form="zmm {k}, zmm, zmm {er}" xed="VFMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm, zmm, zmm" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm, zmm, zmm" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm, zmm, zmm" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".
+	 [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm, zmm, zmm {er}" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm, zmm, zmm {er}" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm, zmm, zmm {er}" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm {k}, zmm, zmm" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm {k}, zmm, zmm" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm {k}, zmm, zmm" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm {k}, zmm, zmm" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm {k}, zmm, zmm" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm {k}, zmm, zmm" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmadd_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD213PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMADD231PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm, zmm, zmm" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm, zmm, zmm" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm, zmm, zmm" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".  
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm, zmm, zmm {er}" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm, zmm, zmm {er}" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm, zmm, zmm {er}" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm {k}, zmm, zmm" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm {k}, zmm, zmm" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm {k}, zmm, zmm" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm {k}, zmm, zmm" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm {k}, zmm, zmm" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm {k}, zmm, zmm" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmadd_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD213PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMADD231PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMADD231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm, zmm, zmm" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm, zmm, zmm" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm, zmm, zmm" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".  
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm, zmm, zmm {er}" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm, zmm, zmm {er}" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm, zmm, zmm {er}" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm {k}, zmm, zmm" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm {k}, zmm, zmm" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm {k}, zmm, zmm" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set). [round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := c[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm {k}, zmm, zmm" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm {k}, zmm, zmm" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm {k}, zmm, zmm" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmsub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="__m512d" varname="c" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB132PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB213PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB213PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<instruction name="VFNMSUB231PD" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB231PD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm, zmm, zmm" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm, zmm, zmm" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm, zmm, zmm" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm, zmm, zmm {er}" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm, zmm, zmm {er}" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm, zmm, zmm {er}" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm {k}, zmm, zmm" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm {k}, zmm, zmm" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm {k}, zmm, zmm" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask3_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).  [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm {k}, zmm, zmm" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm {k}, zmm, zmm" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm {k}, zmm, zmm" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_fnmsub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="__m512" varname="c" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set). 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR	
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB132PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB213PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB213PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<instruction name="VFNMSUB231PS" form="zmm {k}, zmm, zmm {er}" xed="VFNMSUB231PS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="zmm, vm32z" xed="VGATHERDPS_ZMMf32_MASKmskw_MEMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="zmm {k}, vm32z" xed="VGATHERDPS_ZMMf32_MASKmskw_MEMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="zmm, zmm" xed="VGETEXPPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getexp_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[sae_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="zmm, zmm {sae}" xed="VGETEXPPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getexp_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="zmm {k}, zmm" xed="VGETEXPPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getexp_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[sae_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPD" form="zmm {k}, zmm {sae}" xed="VGETEXPPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="zmm, zmm" xed="VGETEXPPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getexp_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[sae_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="zmm, zmm {sae}" xed="VGETEXPPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getexp_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="zmm {k}, zmm" xed="VGETEXPPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getexp_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "floor(log2(x))" for each element.
+	[sae_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETEXPPS" form="zmm {k}, zmm {sae}" xed="VGETEXPPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="zmm, zmm, imm8" xed="VGETMANTPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getmant_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="zmm, zmm, imm8 {sae}" xed="VGETMANTPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getmant_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="zmm {k}, zmm, imm8" xed="VGETMANTPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getmant_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := GetNormalizedMantissa(a[i+63:i], sc, interv)
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPD" form="zmm {k}, zmm, imm8 {sae}" xed="VGETMANTPD_ZMMf64_MASKmskw_ZMMf64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="zmm, zmm, imm8" xed="VGETMANTPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_getmant_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="zmm, zmm, imm8 {sae}" xed="VGETMANTPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getmant_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="zmm {k}, zmm, imm8" xed="VGETMANTPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_getmant_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_MANTISSA_NORM_ENUM" varname="interv" etype="IMM" immtype="_MM_MANTISSA_NORM"/>
+	<parameter type="_MM_MANTISSA_SIGN_ENUM" varname="sc" etype="IMM" immtype="_MM_MANTISSA_SIGN"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Normalize the mantissas of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). This intrinsic essentially calculates "±(2^k)*|x.significand|", where "k" depends on the interval range defined by "interv" and the sign depends on "sc" and the source sign.
+	[getmant_note][sae_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := GetNormalizedMantissa(a[i+31:i], sc, interv)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGETMANTPS" form="zmm {k}, zmm, imm8 {sae}" xed="VGETMANTPS_ZMMf32_MASKmskw_ZMMf32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="zmm, m512" xed="VMOVAPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_load_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<description>Load packed double-precision (64-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="zmm {k}, m512" xed="VMOVAPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mov_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Move packed double-precision (64-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="zmm {k}, zmm" xed="VMOVAPD_ZMMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Store packed double-precision (64-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPD" form="m512 {k}, zmm" xed="VMOVAPD_MEMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_store_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVAPD" form="m512, zmm" xed="VMOVAPD_MEMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="zmm, m512" xed="VMOVAPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_load_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<description>Load packed single-precision (32-bit) floating-point elements from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="zmm {k}, m512" xed="VMOVAPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mov_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Move packed single-precision (32-bit) floating-point elements from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="zmm {k}, zmm" xed="VMOVAPS_ZMMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Store packed single-precision (32-bit) floating-point elements from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPS" form="m512 {k}, zmm" xed="VMOVAPS_MEMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_store_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVAPS" form="m512, zmm" xed="VMOVAPS_MEMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load 512-bits (composed of 16 packed 32-bit integers) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm, m512" xed="VMOVDQA32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_load_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="void const*" varname="mem_addr" etype="M512" memwidth="512"/>
+	<description>Load 512-bits of integer data from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm, m512" xed="VMOVDQA32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_load_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<description>Load packed 32-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MEM[mem_addr+i+31:mem_addr+i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm {k}, m512" xed="VMOVDQA32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mov_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Move packed 32-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm {k}, zmm" xed="VMOVDQA32_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Store packed 32-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA32" form="m512 {k}, zmm" xed="VMOVDQA32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_store_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Store 512-bits (composed of 16 packed 32-bit integers) from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQA32" form="m512, zmm" xed="VMOVDQA32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_store_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="M512" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<description>Store 512-bits of integer data from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQA32" form="m512, zmm" xed="VMOVDQA32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load 512-bits (composed of 8 packed 64-bit integers) from memory into "dst". 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[511:0] := MEM[mem_addr+511:mem_addr]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="zmm, m512" xed="VMOVDQA64_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_load_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<description>Load packed 64-bit integers from memory into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := MEM[mem_addr+i+63:mem_addr+i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="zmm {k}, m512" xed="VMOVDQA64_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mov_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Move</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Move packed 64-bit integers from "a" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="zmm {k}, zmm" xed="VMOVDQA64_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Store packed 64-bit integers from "a" into memory using writemask "k".
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA64" form="m512 {k}, zmm" xed="VMOVDQA64_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_store_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="512"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Store 512-bits (composed of 8 packed 64-bit integers) from "a" into memory. 
+	"mem_addr" must be aligned on a 64-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+511:mem_addr] := a[511:0]
+	</operation>
+	<instruction name="VMOVDQA64" form="m512, zmm" xed="VMOVDQA64_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPD" form="zmm {k}, zmm, zmm" xed="VMULPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mul_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] * b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPD" form="zmm {k}, zmm, zmm {er}" xed="VMULPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPD" form="zmm, zmm, zmm" xed="VMULPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mul_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPD" form="zmm, zmm, zmm {er}" xed="VMULPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).  RM.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPS" form="zmm {k}, zmm, zmm" xed="VMULPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mul_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	 [round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPS" form="zmm {k}, zmm, zmm {er}" xed="VMULPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPS" form="zmm, zmm, zmm" xed="VMULPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mul_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst". 
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMULPS" form="zmm, zmm, zmm {er}" xed="VMULPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDD" form="zmm, zmm, zmm" xed="VPADDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDD" form="zmm {k}, zmm, zmm" xed="VPADDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] AND b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDD" form="zmm, zmm, zmm" xed="VPANDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_and_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<parameter type="__m512i" varname="b" etype="M512"/>
+	<description>Compute the bitwise AND of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDD" form="zmm, zmm, zmm" xed="VPANDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (NOT a[i+31:i]) AND b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDND" form="zmm, zmm, zmm" xed="VPANDND_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_andnot_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<parameter type="__m512i" varname="b" etype="M512"/>
+	<description>Compute the bitwise NOT of 512 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := ((NOT a[511:0]) AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDND" form="zmm, zmm, zmm" xed="VPANDND_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_andnot_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of packed 32-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDND" form="zmm {k}, zmm, zmm" xed="VPANDND_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+dst[511:0] := ((NOT a[511:0]) AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDNQ" form="zmm, zmm, zmm" xed="VPANDNQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_andnot_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of packed 64-bit integers in "a" and then AND with "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDNQ" form="zmm {k}, zmm, zmm" xed="VPANDNQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] AND b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDQ" form="zmm, zmm, zmm" xed="VPANDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] AND b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDQ" form="zmm {k}, zmm, zmm" xed="VPANDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_blend_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Blend packed 32-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBLENDMD" form="zmm {k}, zmm, zmm" xed="VPBLENDMD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_blend_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Blend packed 64-bit integers from "a" and "b" using control mask "k", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPBLENDMQ" form="zmm {k}, zmm, zmm" xed="VPBLENDMQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPEQD" form="k, zmm, zmm" xed="VPCMPEQD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPGTD" form="k, zmm, zmm" xed="VPCMPGTD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmp_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpeq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPEQD" form="k {k}, zmm, zmm" xed="VPCMPEQD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpge_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpgt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPGTD" form="k {k}, zmm, zmm" xed="VPCMPGTD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmple_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpneq_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPD" form="k {k}, zmm, zmm, imm8" xed="VPCMPD_MASKmskw_MASKmskw_ZMMi32_ZMMi32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k".</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmp_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="_MM_CMPINT_ENUM" varname="imm8" etype="IMM" immtype="_MM_CMPINT"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" based on the comparison operand specified by "imm8", and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>CASE (imm8[2:0]) OF
+0: OP := _MM_CMPINT_EQ
+1: OP := _MM_CMPINT_LT
+2: OP := _MM_CMPINT_LE
+3: OP := _MM_CMPINT_FALSE
+4: OP := _MM_CMPINT_NE
+5: OP := _MM_CMPINT_NLT
+6: OP := _MM_CMPINT_NLE
+7: OP := _MM_CMPINT_TRUE
+ESAC
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] OP b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpeq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for equality, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] == b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpge_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpgt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for greater-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &gt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmple_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmplt_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cmpneq_epu32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b" for not-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] != b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPUD" form="k {k}, zmm, zmm, imm8" xed="VPCMPUD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_permutevar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_mask_permutexvar_epi32", and it is recommended that you use that intrinsic name.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	IF k[j]
+		dst[i+31:i] := a[id+31:id]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMD" form="zmm {k}, zmm, zmm" xed="VPERMD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_permutevar_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="idx" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst". Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the "permutevar" name. This intrinsic is identical to "_mm512_permutexvar_epi32", and it is recommended that you use that intrinsic name.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	id := idx[i+3:i]*32
+	dst[i+31:i] := a[id+31:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMD" form="zmm, zmm, zmm" xed="VPERMD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="zmm, vm32z" xed="VPGATHERDD_ZMMu32_MASKmskw_MEMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32gather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="zmm {k}, vm32z" xed="VPGATHERDD_ZMMu32_MASKmskw_MEMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="zmm {k}, zmm, zmm" xed="VPMAXSD_ZMMi32_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXSD" form="zmm, zmm, zmm" xed="VPMAXSD_ZMMi32_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="zmm {k}, zmm, zmm" xed="VPMAXUD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMAXUD" form="zmm, zmm, zmm" xed="VPMAXUD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSD" form="zmm {k}, zmm, zmm" xed="VPMINSD_ZMMi32_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINSD" form="zmm, zmm, zmm" xed="VPMINSD_ZMMi32_MASKmskw_ZMMi32_ZMMi32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUD" form="zmm {k}, zmm, zmm" xed="VPMINUD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMINUD" form="zmm, zmm, zmm" xed="VPMINUD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := a[i+31:i] * b[i+31:i]
+		dst[i+31:i] := tmp[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLD" form="zmm {k}, zmm, zmm" xed="VPMULLD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	tmp[63:0] := a[i+31:i] * b[i+31:i]
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULLD" form="zmm, zmm, zmm" xed="VPMULLD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPORD" form="zmm {k}, zmm, zmm" xed="VPORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise OR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPORD" form="zmm, zmm, zmm" xed="VPORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_or_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<parameter type="__m512i" varname="b" etype="M512"/>
+	<description>Compute the bitwise OR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] OR b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPORD" form="zmm, zmm, zmm" xed="VPORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPORQ" form="zmm {k}, zmm, zmm" xed="VPORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of packed 64-bit integers in "a" and "b", and store the resut in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPORQ" form="zmm, zmm, zmm" xed="VPORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="vm32z, zmm" xed="VPSCATTERDD_MEMu32_MASKmskw_ZMMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32scatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter 32-bit integers from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="vm32z {k}, zmm" xed="VPSCATTERDD_MEMu32_MASKmskw_ZMMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+tmp_dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+tmp_dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+tmp_dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+tmp_dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+tmp_dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+tmp_dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+tmp_dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+tmp_dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+tmp_dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+tmp_dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+tmp_dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+tmp_dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+tmp_dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+tmp_dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+tmp_dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+tmp_dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := tmp_dst[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="zmm {k}, zmm, imm8" xed="VPSHUFD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Shuffle 32-bit integers in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+dst[159:128] := SELECT4(a[255:128], imm8[1:0])
+dst[191:160] := SELECT4(a[255:128], imm8[3:2])
+dst[223:192] := SELECT4(a[255:128], imm8[5:4])
+dst[255:224] := SELECT4(a[255:128], imm8[7:6])
+dst[287:256] := SELECT4(a[383:256], imm8[1:0])
+dst[319:288] := SELECT4(a[383:256], imm8[3:2])
+dst[351:320] := SELECT4(a[383:256], imm8[5:4])
+dst[383:352] := SELECT4(a[383:256], imm8[7:6])
+dst[415:384] := SELECT4(a[511:384], imm8[1:0])
+dst[447:416] := SELECT4(a[511:384], imm8[3:2])
+dst[479:448] := SELECT4(a[511:384], imm8[5:4])
+dst[511:480] := SELECT4(a[511:384], imm8[7:6])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHUFD" form="zmm, zmm, imm8" xed="VPSHUFD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLD" form="zmm {k}, zmm, imm8" xed="VPSLLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_slli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLD" form="zmm, zmm, imm8" xed="VPSLLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="zmm {k}, zmm, zmm" xed="VPSLLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_sllv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSLLVD" form="zmm, zmm, zmm" xed="VPSLLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+		ELSE
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAD" form="zmm {k}, zmm, imm8" xed="VPSRAD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_srai_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="6"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAD" form="zmm, zmm, imm8" xed="VPSRAD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="zmm {k}, zmm, zmm" xed="VPSRAVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_srav_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0)
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRAVD" form="zmm, zmm, zmm" xed="VPSRAVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF imm8[7:0] &gt; 31
+			dst[i+31:i] := 0
+		ELSE
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLD" form="zmm {k}, zmm, imm8" xed="VPSRLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_srli_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLD" form="zmm, zmm, imm8" xed="VPSRLD_ZMMu32_MASKmskw_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		IF count[i+31:i] &lt; 32
+			dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+		ELSE
+			dst[i+31:i] := 0
+		FI
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="zmm {k}, zmm, zmm" xed="VPSRLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_srlv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF count[i+31:i] &lt; 32
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSRLVD" form="zmm, zmm, zmm" xed="VPSRLVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBD" form="zmm {k}, zmm, zmm" xed="VPSUBD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_sub_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBD" form="zmm, zmm, zmm" xed="VPSUBD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" (subject to writemask "k") if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+	ELSE
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTMD" form="k {k}, zmm, zmm" xed="VPTESTMD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_test_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise AND of packed 32-bit integers in "a" and "b", producing intermediate 32-bit values, and set the corresponding bit in result mask "k" if the intermediate value is non-zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ((a[i+31:i] AND b[i+31:i]) != 0) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPTESTMD" form="k, zmm, zmm" xed="VPTESTMD_MASKmskw_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPXORD" form="zmm {k}, zmm, zmm" xed="VPXORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_xor_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Compute the bitwise XOR of packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPXORD" form="zmm, zmm, zmm" xed="VPXORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_xor_si512">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<parameter type="__m512i" varname="b" etype="M512"/>
+	<description>Compute the bitwise XOR of 512 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[511:0] := (a[511:0] XOR b[511:0])
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPXORD" form="zmm, zmm, zmm" xed="VPXORD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm {k}, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_xor_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPXORQ" form="zmm, zmm, zmm" xed="VPXORQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="vm32z, zmm" xed="VSCATTERDPS_MEMf32_MASKmskw_ZMMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Scatter single-precision (32-bit) floating-point elements from "a" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not stored when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="vm32z {k}, zmm" xed="VSCATTERDPS_MEMf32_MASKmskw_ZMMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPD" form="zmm {k}, zmm, zmm" xed="VSUBPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_sub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPD" form="zmm {k}, zmm, zmm {er}" xed="VSUBPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPD" form="zmm, zmm, zmm" xed="VSUBPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_sub_round_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPD" form="zmm, zmm, zmm {er}" xed="VSUBPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPS" form="zmm {k}, zmm, zmm" xed="VSUBPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_sub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI	
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPS" form="zmm {k}, zmm, zmm {er}" xed="VSUBPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPS" form="zmm, zmm, zmm" xed="VSUBPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_sub_round_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBPS" form="zmm, zmm, zmm {er}" xed="VSUBPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_castpd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m512d to type __m512.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_castpd_si512">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m512d to type __m512i.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_castps_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m512 to type __m512d.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_castps_si512">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<return type="__m512i" varname="dst" etype="M512"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m512 to type __m512i.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_castsi512_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Cast vector of type __m512i to type __m512d.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_castsi512_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Cast</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Cast vector of type __m512i to type __m512.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := dst[31:0] + a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := dst[63:0] + a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 0.0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := dst[63:0] + a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := 0.0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := dst[31:0] + a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a".</description>
+	<operation>
+dst[31:0] := 0xFFFFFFFF
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := dst[31:0] AND a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise AND using mask "k". Returns the bitwise AND of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 0xFFFFFFFFFFFFFFFF
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := dst[63:0] AND a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="int" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := Int32(-0x80000000)
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := (dst[31:0] &gt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__int64" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := Int64(-0x8000000000000000)
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := (dst[63:0] &gt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := (dst[31:0] &gt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := (dst[63:0] &gt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := Cast_FP64(0xFFEFFFFFFFFFFFFF)
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := (dst[63:0] &gt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the maximum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := Cast_FP32(0xFF7FFFFF)
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := (dst[31:0] &gt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="int" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Reduce the packed signed 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := Int32(0x7FFFFFFF)
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := (dst[31:0] &lt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__int64" varname="dst" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Reduce the packed signed 64-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := Int64(0x7FFFFFFFFFFFFFFF)
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := (dst[63:0] &lt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := 0xFFFFFFFF
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := (dst[31:0] &lt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by minimum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 0xFFFFFFFFFFFFFFFF
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := (dst[63:0] &lt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+dst[63:0] := Cast_FP64(0x7FEFFFFFFFFFFFFF)
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := (dst[63:0] &lt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum using mask "k". Returns the minimum of all active elements in "a".</description>
+	<operation>
+dst[31:0] := Cast_FP32(0x7F7FFFFF)
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := (dst[31:0] &lt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+dst[31:0] := 1
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := dst[31:0] * a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_mul_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 1
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := dst[63:0] * a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 1.0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := dst[63:0] * a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication using mask "k". Returns the product of all active elements in "a".</description>
+	<operation>
+dst[31:0] := FP32(1.0)
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := dst[31:0] * a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a".</description>
+	<operation>
+dst[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[31:0] := dst[31:0] OR a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_mask_reduce_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise OR using mask "k". Returns the bitwise OR of all active elements in "a".</description>
+	<operation>
+dst[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[63:0] := dst[63:0] OR a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_add_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+dst[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := dst[31:0] + a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_add_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+dst[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := dst[63:0] + a[i+63:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_add_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+dst[63:0] := 0.0
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := dst[63:0] + a[i+63:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_add_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition. Returns the sum of all elements in "a".</description>
+	<operation>
+dst[31:0] := 0.0
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := dst[31:0] + a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a".</description>
+	<operation>
+dst[31:0] := 0xFFFFFFFF
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := dst[31:0] AND a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_and_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise AND. Returns the bitwise AND of all elements in "a".</description>
+	<operation>
+dst[63:0] := 0xFFFFFFFFFFFFFFFF
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := dst[63:0] AND a[i+63:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_max_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="int" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Reduce the packed signed 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+dst[31:0] := Int32(-0x80000000)
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := (dst[31:0] &gt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_max_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__int64" varname="dst" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Reduce the packed signed 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+dst[63:0] := Int64(-0x8000000000000000)
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := (dst[63:0] &gt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_max_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+dst[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := (dst[31:0] &gt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_max_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+dst[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := (dst[63:0] &gt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_max_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+dst[63:0] := Cast_FP64(0xFFEFFFFFFFFFFFFF)
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := (dst[63:0] &gt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_max_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by maximum. Returns the maximum of all elements in "a".</description>
+	<operation>
+dst[31:0] := Cast_FP32(0xFF7FFFFF)
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := (dst[31:0] &gt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_min_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="int" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<description>Reduce the packed signed 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+dst[31:0] := Int32(0x7FFFFFFF)
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := (dst[31:0] &lt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_min_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__int64" varname="dst" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="SI64"/>
+	<description>Reduce the packed signed 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+dst[63:0] := Int64(0x7FFFFFFFFFFFFFFF)
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := (dst[63:0] &lt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_min_epu32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed unsigned 32-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+dst[31:0] := 0xFFFFFFFF
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := (dst[31:0] &lt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_min_epu64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed unsigned 64-bit integers in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+dst[63:0] := 0xFFFFFFFFFFFFFFFF
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := (dst[63:0] &lt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_min_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+dst[63:0] := Cast_FP64(0x7FEFFFFFFFFFFFFF)
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := (dst[63:0] &lt; a[i+63:i] ? dst[63:0] : a[i+63:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_min_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by minimum. Returns the minimum of all elements in "a".</description>
+	<operation>
+dst[31:0] := Cast_FP32(0x7F7FFFFF)
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := (dst[31:0] &lt; a[i+31:i] ? dst[31:0] : a[i+31:i])
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_mul_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+dst[31:0] := 1
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := dst[31:0] * a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_mul_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+dst[63:0] := 1
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := dst[63:0] * a[i+63:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Reduce the packed double-precision (64-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+dst[63:0] := 1.0
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := dst[63:0] * a[i+63:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Reduce the packed single-precision (32-bit) floating-point elements in "a" by multiplication. Returns the product of all elements in "a".</description>
+	<operation>
+dst[31:0] := FP32(1.0)
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := dst[31:0] * a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_or_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Reduce the packed 32-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a".</description>
+	<operation>
+dst[31:0] := 0
+FOR j := 0 to 15
+	i := j*32
+	dst[31:0] := dst[31:0] OR a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" sequence="TRUE" name="_mm512_reduce_or_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Reduce the packed 64-bit integers in "a" by bitwise OR. Returns the bitwise OR of all elements in "a".</description>
+	<operation>
+dst[63:0] := 0
+FOR j := 0 to 7
+	i := j*64
+	dst[63:0] := dst[63:0] OR a[i+63:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_and_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Logical</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<description>Performs element-by-element bitwise AND between packed 32-bit integer elements of "v2" and "v3", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] &amp; v3[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDD" form="zmm {k}, zmm, zmm" xed="VPANDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cvtpslo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<description>Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := j*64
+	dst[n+63:n] := Convert_FP32_To_FP64(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cvtpslo_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<description>Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[l+63:l] := Convert_FP32_To_FP64(v2[i+31:i])
+	ELSE
+		dst[l+63:l] := src[l+63:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPS2PD" form="zmm {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cvtepi32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="v2" etype="SI32"/>
+	<description>Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cvtepi32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v2" etype="SI32"/>
+	<description>Performs element-by-element conversion of the lower half of packed 32-bit integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := j*64
+	IF k[j]
+		dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i])
+	ELSE
+		dst[n+63:n] := src[n+63:n]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTDQ2PD" form="zmm {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cvtepu32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<description>Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	n := j*64
+	dst[n+63:n] := Convert_Int32_To_FP64(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cvtepu32lo_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<description>Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in "v2" to packed double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	l := j*64
+	IF k[j]
+		dst[l+63:l] := Convert_Int32_To_FP64(v2[i+31:i])
+	ELSE
+		dst[l+63:l] := src[l+63:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTUDQ2PD" form="zmm {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32extgather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const *" varname="base_addr" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 16 memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst". AVX512 only supports _MM_UPCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   dst[i+31:i] := MEM[addr+31:addr]
+	_MM_UPCONV_EPI32_UINT8:  dst[i+31:i] := ZeroExtend32(MEM[addr+7:addr])
+	_MM_UPCONV_EPI32_SINT8:  dst[i+31:i] := SignExtend32(MEM[addr+7:addr])
+	_MM_UPCONV_EPI32_UINT16: dst[i+31:i] := ZeroExtend32(MEM[addr+15:addr])
+	_MM_UPCONV_EPI32_SINT16: dst[i+31:i] := SignExtend32(MEM[addr+15:addr])
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="zmm, m512" xed="VPGATHERDD_ZMMu32_MASKmskw_MEMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32extgather_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const *" varname="base_addr" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 16 single-precision (32-bit) memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). AVX512 only supports _MM_UPCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:   dst[i+31:i] := MEM[addr+31:addr]
+		_MM_UPCONV_EPI32_UINT8:  dst[i+31:i] := ZeroExtend32(MEM[addr+7:addr])
+		_MM_UPCONV_EPI32_SINT8:  dst[i+31:i] := SignExtend32(MEM[addr+7:addr])
+		_MM_UPCONV_EPI32_UINT16: dst[i+31:i] := ZeroExtend32(MEM[addr+15:addr])
+		_MM_UPCONV_EPI32_SINT16: dst[i+31:i] := SignExtend32(MEM[addr+15:addr])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDD" form="zmm {k}, m512" xed="VPGATHERDD_ZMMu32_MASKmskw_MEMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32loextgather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE: dst[i+63:i] := MEM[addr+63:addr]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="zmm, m512" xed="VPGATHERDQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32loextgather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE: dst[i+63:i] := MEM[addr+63:addr]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="zmm {k}, m512" xed="VPGATHERDQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 16 memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in "dst". AVX512 only supports _MM_UPCONV_PS_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:    dst[i+31:i] := MEM[addr+31:addr]
+	_MM_UPCONV_PS_FLOAT16: dst[i+31:i] := Convert_FP16_To_FP32(MEM[addr+15:addr])
+	_MM_UPCONV_PS_UINT8:   dst[i+31:i] := Convert_UInt8_To_FP32(MEM[addr+7:addr])
+	_MM_UPCONV_PS_SINT8:   dst[i+31:i] := Convert_Int8_To_FP32(MEM[addr+7:addr])
+	_MM_UPCONV_PS_UINT16:  dst[i+31:i] := Convert_UInt16_To_FP32(MEM[addr+15:addr])
+	_MM_UPCONV_PS_SINT16:  dst[i+31:i] := Convert_Int16_To_FP32(MEM[addr+15:addr])
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="zmm, m512" xed="VGATHERDPS_ZMMf32_MASKmskw_MEMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 16 single-precision (32-bit) memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). AVX512 only supports _MM_UPCONV_PS_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:    dst[i+31:i] := MEM[addr+31:addr]
+		_MM_UPCONV_PS_FLOAT16: dst[i+31:i] := Convert_FP16_To_FP32(MEM[addr+15:addr])
+		_MM_UPCONV_PS_UINT8:   dst[i+31:i] := Convert_UInt8_To_FP32(MEM[addr+7:addr])
+		_MM_UPCONV_PS_SINT8:   dst[i+31:i] := Convert_Int8_To_FP32(MEM[addr+7:addr])
+		_MM_UPCONV_PS_UINT16:  dst[i+31:i] := Convert_UInt16_To_FP32(MEM[addr+15:addr])
+		_MM_UPCONV_PS_SINT16:  dst[i+31:i] := Convert_Int16_To_FP32(MEM[addr+15:addr])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPS" form="zmm {k}, m512" xed="VGATHERDPS_ZMMf32_MASKmskw_MEMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32loextgather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: dst[i+63:i] := MEM[addr+63:addr]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="zmm, m512" xed="VGATHERDPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32loextgather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PD_NONE:
+			dst[i+63:i] := MEM[addr+63:addr]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="zmm {k}, m512" xed="VGATHERDPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 16 packed single-precision (32-bit) floating-point elements in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv". AVX512 only supports _MM_DOWNCONV_PS_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:    MEM[addr+31:addr] := a[i+31:i]
+	_MM_DOWNCONV_PS_FLOAT16: MEM[addr+15:addr] := Convert_FP32_To_FP16(a[i+31:i])
+	_MM_DOWNCONV_PS_UINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_UInt8(a[i+31:i])
+	_MM_DOWNCONV_PS_SINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_Int8(a[i+31:i])
+	_MM_DOWNCONV_PS_UINT16:  MEM[addr+15:addr] := Convert_FP32_To_UInt16(a[i+31:i])
+	_MM_DOWNCONV_PS_SINT16:  MEM[addr+15:addr] := Convert_FP32_To_Int16(a[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="m512, zmm" xed="VSCATTERDPS_MEMf32_MASKmskw_ZMMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 16 packed single-precision (32-bit) floating-point elements in "a" according to "conv" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using writemask "k" (elements are written only when the corresponding mask bit is not set). AVX512 only supports _MM_DOWNCONV_PS_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_PS_NONE:    MEM[addr+31:addr] := a[i+31:i]
+		_MM_DOWNCONV_PS_FLOAT16: MEM[addr+15:addr] := Convert_FP32_To_FP16(a[i+31:i])
+		_MM_DOWNCONV_PS_UINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_UInt8(a[i+31:i])
+		_MM_DOWNCONV_PS_SINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_Int8(a[i+31:i])
+		_MM_DOWNCONV_PS_UINT16:  MEM[addr+15:addr] := Convert_FP32_To_UInt16(a[i+31:i])
+		_MM_DOWNCONV_PS_SINT16:  MEM[addr+15:addr] := Convert_FP32_To_Int16(a[i+31:i])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPS" form="m512 {k}, zmm" xed="VSCATTERDPS_MEMf32_MASKmskw_ZMMf32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32loextscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_PD_NONE: MEM[addr+63:addr] := a[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="m512, zmm" xed="VSCATTERDPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32loextscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_PD_NONE: MEM[addr+63:addr] := a[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="m512 {k}, zmm" xed="VSCATTERDPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32loextscatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE: MEM[addr+63:addr] := a[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="m512, zmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32loextscatter_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using "conv". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE: MEM[addr+63:addr] := a[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="m512 {k}, zmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_cvtpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<description>Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst". The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Convert_FP64_To_FP32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_cvtpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<description>Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to single-precision (32-bit) floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_FP64_To_FP32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="zmm {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32logather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="zmm, m512" xed="VPGATHERDQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32logather_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 64-bit integer elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPGATHERDQ" form="zmm {k}, m512" xed="VPGATHERDQ_ZMMu64_MASKmskw_MEMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32logather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 double-precision (64-bit) floating-point elements stored at memory locations starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" them in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	dst[i+63:i] := MEM[addr+63:addr]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="zmm, m512" xed="VGATHERDPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32logather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 double-precision (64-bit) floating-point elements from memory starting at location "base_addr" at packed 32-bit integer indices stored in the lower half of "vindex" scaled by "scale" into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		dst[i+63:i] := MEM[addr+63:addr]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGATHERDPD" form="zmm {k}, m512" xed="VGATHERDPD_ZMMf64_MASKmskw_MEMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32loscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed double-precision (64-bit) floating-point elements in "a" and to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="m512, zmm" xed="VSCATTERDPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32loscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed double-precision (64-bit) floating-point elements in "a" to memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERDPD" form="m512 {k}, zmm" xed="VSCATTERDPD_MEMf64_MASKmskw_ZMMf64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_abs_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<description>Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ABS(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDD" form="zmm, zmm, m512" xed="VPANDD_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_abs_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<description>Finds the absolute value of each packed single-precision (32-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ABS(v2[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDD" form="zmm {k}, zmm, m512" xed="VPANDD_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_abs_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<description>Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ABS(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDQ" form="zmm, zmm, m512" xed="VPANDQ_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_abs_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<description>Finds the absolute value of each packed double-precision (64-bit) floating-point element in "v2", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ABS(v2[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPANDQ" form="zmm {k}, zmm, m512" xed="VPANDQ_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML/KNC" name="_mm512_log2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOG2PS" form="zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML/KNC" name="_mm512_mask_log2_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOG2PS" form="zmm {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_i32extscatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 16 packed 32-bit integer elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal. AVX512 only supports _MM_DOWNCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_EPI32_NONE:   MEM[addr+31:addr] := a[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:  MEM[addr+ 7:addr] := Truncate8(a[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:  MEM[addr+ 7:addr] := Saturate8(a[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16: MEM[addr+15:addr] := Truncate16(a[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16: MEM[addr+15:addr] := Saturate16(a[i+15:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="m512, zmm" xed="VPSCATTERDD_MEMu32_MASKmskw_ZMMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_i32extscatter_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512F/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 16 packed 32-bit integer elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". Elements are written using writemask "k" (elements are only written when the corresponding mask bit is set; otherwise, elements are left unchanged in memory). "hint" indicates to the processor whether the data is non-temporal. AVX512 only supports _MM_DOWNCONV_EPI32_NONE.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI32_NONE:   MEM[addr+31:addr] := a[i+31:i]
+		_MM_DOWNCONV_EPI32_UINT8:  MEM[addr+ 7:addr] := Truncate8(a[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT8:  MEM[addr+ 7:addr] := Saturate8(a[i+31:i])
+		_MM_DOWNCONV_EPI32_UINT16: MEM[addr+15:addr] := Truncate16(a[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT16: MEM[addr+15:addr] := Saturate16(a[i+15:i])
+		ESAC
+	FI 
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDD" form="m512 {k}, zmm" xed="VPSCATTERDD_MEMu32_MASKmskw_ZMMu32_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="zmm, zmm, zmm" xed="VPMADD52LUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="zmm {k}, zmm, zmm" xed="VPMADD52LUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="zmm {z}, zmm, zmm" xed="VPMADD52LUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="ymm, ymm, ymm" xed="VPMADD52LUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="ymm {k}, ymm, ymm" xed="VPMADD52LUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="ymm {z}, ymm, ymm" xed="VPMADD52LUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="xmm, xmm, xmm" xed="VPMADD52LUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="xmm {k}, xmm, xmm" xed="VPMADD52LUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_madd52lo_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADD52LUQ" form="xmm {z}, xmm, xmm" xed="VPMADD52LUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="zmm, zmm, zmm" xed="VPMADD52HUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="zmm {k}, zmm, zmm" xed="VPMADD52HUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="zmm {z}, zmm, zmm" xed="VPMADD52HUQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="ymm, ymm, ymm" xed="VPMADD52HUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="ymm {k}, ymm, ymm" xed="VPMADD52HUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="ymm {z}, ymm, ymm" xed="VPMADD52HUQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+	dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="xmm, xmm, xmm" xed="VPMADD52HUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="xmm {k}, xmm, xmm" xed="VPMADD52HUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_madd52hi_epu64">
+	<CPUID>AVX512IFMA52</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Multiply packed unsigned 52-bit integers in each 64-bit element of "b" and "c" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
+		dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMADD52HUQ" form="xmm {z}, xmm, xmm" xed="VPMADD52HUQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_prefetch_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache. "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j:= 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+31:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0QPS" form="vm64z" xed="VGATHERPF0QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1QPS" form="vm64z" xed="VGATHERPF1QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_prefetch_i64gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are only brought into cache when their corresponding mask bit is set). "scale" should be 1, 2, 4 or 8.. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j:= 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+31:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0QPS" form="vm64z {k}" xed="VGATHERPF0QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1QPS" form="vm64z {k}" xed="VGATHERPF1QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_prefetch_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. Elements are prefetched into cache level "hint", where "hint" is 0 or 1. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+31:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0QPS" form="vm64z" xed="VSCATTERPF0QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1QPS" form="vm64z" xed="VSCATTERPF1QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_prefetch_i64scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 32-bit elements are stored at addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+31:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0QPS" form="vm64z {k}" xed="VSCATTERPF0QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1QPS" form="vm64z {k}" xed="VSCATTERPF1QPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_prefetch_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache. "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+63:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0DPD" form="vm32y" xed="VGATHERPF0DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1DPD" form="vm32y" xed="VGATHERPF1DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_prefetch_i32gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+63:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0DPD" form="vm32y {k}" xed="VGATHERPF0DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1DPD" form="vm32y {k}" xed="VGATHERPF1DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_prefetch_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+63:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0DPD" form="vm32y" xed="VSCATTERPF0DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1DPD" form="vm32y" xed="VSCATTERPF1DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_prefetch_i32scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="vindex" etype="SI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+63:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0DPD" form="vm32y {k}" xed="VSCATTERPF0DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1DPD" form="vm32y {k}" xed="VSCATTERPF1DPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_prefetch_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by "hint" using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+63:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0QPD" form="vm32z" xed="VGATHERPF0QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1QPD" form="vm32z" xed="VGATHERPF1QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_prefetch_i64gather_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="base_addr" etype="FP64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by "hint" using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Prefetched elements are merged in cache using writemask "k" (elements are copied from memory when the corresponding mask bit is set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+63:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0QPD" form="vm32z {k}" xed="VGATHERPF0QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1QPD" form="vm32z {k}" xed="VGATHERPF1QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_prefetch_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+63:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0QPD" form="vm32z" xed="VSCATTERPF0QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1QPD" form="vm32z" xed="VSCATTERPF1QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_prefetch_i64scatter_pd">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale") subject to mask "k" (elements are not brought into cache when the corresponding mask bit is not set). "scale" should be 1, 2, 4 or 8.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+63:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0QPD" form="vm32z {k}" xed="VSCATTERPF0QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1QPD" form="vm32z {k}" xed="VSCATTERPF1QPD_MEMf64_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_prefetch_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetch single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). "scale" should be 1, 2, 4 or 8. The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+31:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0DPS" form="vm32y {k}" xed="VGATHERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1DPS" form="vm32y {k}" xed="VGATHERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_prefetch_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "base_addr" and 32-bit integer index vector "vindex" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
+The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+31:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0DPS" form="m512" xed="VGATHERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1DPS" form="m512" xed="VGATHERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_prefetch_i32extgather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "base_addr" and 32-bit integer index vector "vindex" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". Gathered elements are merged in cache using writemask "k" (elements are brought into cache only when their corresponding mask bits are set). The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
+The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+31:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0DPS" form="m512 {k}" xed="VGATHERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1DPS" form="m512 {k}" xed="VGATHERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_prefetch_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "base_addr" and 32-bit integer index vector "vindex" with scale "scale" to L1 or L2 level of cache depending on the value of "hint", with a request for exclusive ownership. The "hint" parameter may be one of the following: _MM_HINT_T0 = 1 for prefetching to L1 cache, _MM_HINT_T1 = 2 for prefetching to L2 cache, _MM_HINT_T2 = 3 for prefetching to L2 cache non-temporal, _MM_HINT_NTA = 0 for prefetching to L1 cache non-temporal. The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent scatter intrinsic.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+31:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0DPS" form="m512" xed="VSCATTERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1DPS" form="m512" xed="VSCATTERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_prefetch_i32extscatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address "base_addr" and 32-bit integer index vector "vindex" with scale "scale" to L1 or L2 level of cache depending on the value of "hint". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.
+The "conv" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the "conv" parameter specified for the subsequent gather intrinsic. Only those elements whose corresponding mask bit in "k" is set are loaded into cache.</description>
+	<operation>
+cachev := 0
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+31:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0DPS" form="m512 {k}" xed="VSCATTERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1DPS" form="m512 {k}" xed="VSCATTERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_prefetch_i32gather_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Load</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="void const*" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+31:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VGATHERPF0DPS" form="m512" xed="VGATHERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VGATHERPF1DPS" form="m512" xed="VGATHERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_prefetch_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	Prefetch(MEM[addr+31:addr], hint)
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0DPS" form="m512" xed="VSCATTERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1DPS" form="m512" xed="VSCATTERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512/KNC" name="_mm512_mask_prefetch_i32scatter_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512PF/KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="IMM" hint="TRUE" immtype="_MM_HINT_PREFETCH"/>
+	<description>Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale". The "hint" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. Only those elements whose corresponding mask bit in "k" is set are loaded into the desired cache.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		Prefetch(MEM[addr+31:addr], hint)
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VSCATTERPF0DPS" form="m512 {k}" xed="VSCATTERPF0DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<instruction name="VSCATTERPF1DPS" form="m512 {k}" xed="VSCATTERPF1DPS_MEMf32_MASKmskw_AVX512PF_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="ymm {z}, ymm" xed="VPOPCNTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="ymm {k}, ymm" xed="VPOPCNTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := POPCNT(a[i+63:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="ymm, ymm" xed="VPOPCNTQ_YMMu64_MASKmskw_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="xmm {z}, xmm" xed="VPOPCNTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="xmm {k}, xmm" xed="VPOPCNTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := POPCNT(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="xmm, xmm" xed="VPOPCNTQ_XMMu64_MASKmskw_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := POPCNT(a[i+31:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="ymm, ymm" xed="VPOPCNTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_mask_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="ymm {k}, ymm" xed="VPOPCNTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_maskz_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="ymm {z}, ymm" xed="VPOPCNTD_YMMu32_MASKmskw_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := POPCNT(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="xmm, xmm" xed="VPOPCNTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_mask_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="xmm {k}, xmm" xed="VPOPCNTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_maskz_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="xmm {z}, xmm" xed="VPOPCNTD_XMMu32_MASKmskw_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := POPCNT(a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="zmm, zmm" xed="VPOPCNTD_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="zmm {k}, zmm" xed="VPOPCNTD_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_popcnt_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<description>Count the number of logical 1 bits in packed 32-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := POPCNT(a[i+31:i])
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTD" form="zmm {z}, zmm" xed="VPOPCNTD_ZMMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := POPCNT(a[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="zmm, zmm" xed="VPOPCNTQ_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="zmm {k}, zmm" xed="VPOPCNTQ_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_popcnt_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512VPOPCNTDQ</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<description>Count the number of logical 1 bits in packed 64-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := POPCNT(a[i+63:i])
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTQ" form="zmm {z}, zmm" xed="VPOPCNTQ_ZMMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_4fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__m512" varname="a0" etype="FP32"/>
+	<parameter type="__m512" varname="a1" etype="FP32"/>
+	<parameter type="__m512" varname="a2" etype="FP32"/>
+	<parameter type="__m512" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by the 4 corresponding packed elements in "b", accumulate with the corresponding elements in "src", and store the results in "dst".</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		addr := b + m * 32
+		dst.fp32[i] := dst.fp32[i] + a{m}.fp32[i] * Cast_FP32(MEM[addr+31:addr])
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="V4FMADDPS" form="zmm, zmm, m128" xed="V4FMADDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_4fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a0" etype="FP32"/>
+	<parameter type="__m512" varname="a1" etype="FP32"/>
+	<parameter type="__m512" varname="a2" etype="FP32"/>
+	<parameter type="__m512" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by the 4 corresponding packed elements in "b", accumulate with the corresponding elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		addr := b + m * 32
+		IF k[i]
+			dst.fp32[i] := dst.fp32[i] + a{m}.fp32[i] * Cast_FP32(MEM[addr+31:addr])
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="V4FMADDPS" form="zmm {k}, zmm, m128" xed="V4FMADDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_4fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__m512" varname="a0" etype="FP32"/>
+	<parameter type="__m512" varname="a1" etype="FP32"/>
+	<parameter type="__m512" varname="a2" etype="FP32"/>
+	<parameter type="__m512" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by the 4 corresponding packed elements in "b", accumulate with the corresponding elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		addr := b + m * 32
+		IF k[i]
+			dst.fp32[i] := dst.fp32[i] + a{m}.fp32[i] * Cast_FP32(MEM[addr+31:addr])
+		ELSE
+			dst.fp32[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="V4FMADDPS" form="zmm {z}, zmm, m128" xed="V4FMADDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_4fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__m512" varname="a0" etype="FP32"/>
+	<parameter type="__m512" varname="a1" etype="FP32"/>
+	<parameter type="__m512" varname="a2" etype="FP32"/>
+	<parameter type="__m512" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by the 4 corresponding packed elements in "b", accumulate the negated intermediate result with the corresponding elements in "src", and store the results in "dst".</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		addr := b + m * 32
+		dst.fp32[i] := dst.fp32[i] - a{m}.fp32[i] * Cast_FP32(MEM[addr+31:addr])
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="V4FNMADDPS" form="zmm, zmm, m128" xed="V4FNMADDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_4fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a0" etype="FP32"/>
+	<parameter type="__m512" varname="a1" etype="FP32"/>
+	<parameter type="__m512" varname="a2" etype="FP32"/>
+	<parameter type="__m512" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by the 4 corresponding packed elements in "b", accumulate the negated intermediate result with the corresponding elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		addr := b + m * 32
+		IF k[i]
+			dst.fp32[i] := dst.fp32[i] - a{m}.fp32[i] * Cast_FP32(MEM[addr+31:addr])
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="V4FNMADDPS" form="zmm {k}, zmm, m128" xed="V4FNMADDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_4fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__m512" varname="a0" etype="FP32"/>
+	<parameter type="__m512" varname="a1" etype="FP32"/>
+	<parameter type="__m512" varname="a2" etype="FP32"/>
+	<parameter type="__m512" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by the 4 corresponding packed elements in "b", accumulate the negated intermediate result with the corresponding elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		addr := b + m * 32
+		IF k[i]
+			dst.fp32[i] := dst.fp32[i] - a{m}.fp32[i] * Cast_FP32(MEM[addr+31:addr])
+		ELSE
+			dst.fp32[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="V4FNMADDPS" form="zmm {z}, zmm, m128" xed="V4FNMADDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_4fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__m128" varname="a0" etype="FP32"/>
+	<parameter type="__m128" varname="a1" etype="FP32"/>
+	<parameter type="__m128" varname="a2" etype="FP32"/>
+	<parameter type="__m128" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by corresponding element in "b", accumulate  with the lower element in "a", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[127:0] := src[127:0]
+FOR m := 0 to 3
+	addr := b + m * 32
+	dst.fp32[0] := dst.fp32[0] + a{m}.fp32[0] * Cast_FP32(MEM[addr+31:addr])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="V4FMADDSS" form="xmm, xmm, m128" xed="V4FMADDSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_mask_4fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a0" etype="FP32"/>
+	<parameter type="__m128" varname="a1" etype="FP32"/>
+	<parameter type="__m128" varname="a2" etype="FP32"/>
+	<parameter type="__m128" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by corresponding element in "b", accumulate  with the lower element in "a", and store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set).</description>
+	<operation>
+dst[127:0] := src[127:0]
+IF k[0]
+	FOR m := 0 to 3
+		addr := b + m * 32
+		dst.fp32[0] := dst.fp32[0] + a{m}.fp32[0] * Cast_FP32(MEM[addr+31:addr])
+	ENDFOR
+FI
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="V4FMADDSS" form="xmm {k}, xmm, m128" xed="V4FMADDSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_maskz_4fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__m128" varname="a0" etype="FP32"/>
+	<parameter type="__m128" varname="a1" etype="FP32"/>
+	<parameter type="__m128" varname="a2" etype="FP32"/>
+	<parameter type="__m128" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by corresponding element in "b", accumulate  with the lower element in "a", and store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set).</description>
+	<operation>
+dst[127:0] := src[127:0]
+IF k[0]
+	FOR m := 0 to 3
+		addr := b + m * 32
+		dst.fp32[0] := dst.fp32[0] + a{m}.fp32[0] * Cast_FP32(MEM[addr+31:addr])
+	ENDFOR
+ELSE
+	dst.fp32[0] := 0
+FI
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="V4FMADDSS" form="xmm {z}, xmm, m128" xed="V4FMADDSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_4fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__m128" varname="a0" etype="FP32"/>
+	<parameter type="__m128" varname="a1" etype="FP32"/>
+	<parameter type="__m128" varname="a2" etype="FP32"/>
+	<parameter type="__m128" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by corresponding element in "b", accumulate  the negated intermediate result with the lower element in "src", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[127:0] := src[127:0]
+FOR m := 0 to 3
+	addr := b + m * 32
+	dst.fp32[0] := dst.fp32[0] - a{m}.fp32[0] * Cast_FP32(MEM[addr+31:addr])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="V4FNMADDSS" form="xmm, xmm, m128" xed="V4FNMADDSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_mask_4fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a0" etype="FP32"/>
+	<parameter type="__m128" varname="a1" etype="FP32"/>
+	<parameter type="__m128" varname="a2" etype="FP32"/>
+	<parameter type="__m128" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by corresponding element in "b", accumulate  the negated intermediate result with the lower element in "src", and store the result in the lower element of "dst" using writemask "k" (the element is copied from "a" when mask bit 0 is not set).</description>
+	<operation>
+dst[127:0] := src[127:0]
+IF k[0]
+	FOR m := 0 to 3
+		addr := b + m * 32
+		dst.fp32[0] := dst.fp32[0] - a{m}.fp32[0] * Cast_FP32(MEM[addr+31:addr])
+	ENDFOR
+FI
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="V4FNMADDSS" form="xmm {k}, xmm, m128" xed="V4FNMADDSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_maskz_4fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>AVX512_4FMAPS</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__m128" varname="a0" etype="FP32"/>
+	<parameter type="__m128" varname="a1" etype="FP32"/>
+	<parameter type="__m128" varname="a2" etype="FP32"/>
+	<parameter type="__m128" varname="a3" etype="FP32"/>
+	<parameter type="__m128 *" varname="b" etype="FP32" memwidth="128"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements specified in 4 consecutive operands "a0" through "a3" by corresponding element in "b", accumulate  the negated intermediate result with the lower element in "src", and store the result in the lower element of "dst" using zeromask "k" (the element is zeroed out when mask bit 0 is not set).</description>
+	<operation>
+dst[127:0] := src[127:0]
+IF k[0]
+	FOR m := 0 to 3
+		addr := b + m * 32
+		dst.fp32[0] := dst.fp32[0] - a{m}.fp32[0] * Cast_FP32(MEM[addr+31:addr])
+	ENDFOR
+ELSE
+	dst.fp32[0] := 0
+FI
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="V4FNMADDSS" form="xmm {z}, xmm, m128" xed="V4FNMADDSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_4dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a0" etype="SI16"/>
+	<parameter type="__m512i" varname="a1" etype="SI16"/>
+	<parameter type="__m512i" varname="a2" etype="SI16"/>
+	<parameter type="__m512i" varname="a3" etype="SI16"/>
+	<parameter type="__m128i *" varname="b" etype="SI16" memwidth="128"/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation, and store the results in "dst".</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		lim_base := b + m*32
+		t.dword  := MEM[lim_base+31:lim_base]
+		p1.dword := SignExtend32(a{m}.word[2*i+0]) * SignExtend32(Cast_Int16(t.word[0]))
+		p2.dword := SignExtend32(a{m}.word[2*i+1]) * SignExtend32(Cast_Int16(t.word[1]))
+		dst.dword[i] := dst.dword[i] + p1.dword + p2.dword
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VP4DPWSSD" form="zmm, zmm, m128" xed="VP4DPWSSD_ZMMi32_MASKmskw_ZMMi16_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_4dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a0" etype="SI16"/>
+	<parameter type="__m512i" varname="a1" etype="SI16"/>
+	<parameter type="__m512i" varname="a2" etype="SI16"/>
+	<parameter type="__m512i" varname="a3" etype="SI16"/>
+	<parameter type="__m128i *" varname="b" etype="SI16" memwidth="128"/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	IF k[i]
+		FOR m := 0 to 3
+			lim_base := b + m*32
+			t.dword  := MEM[lim_base+31:lim_base]
+			p1.dword := SignExtend32(a{m}.word[2*i+0]) * SignExtend32(Cast_Int16(t.word[0]))
+			p2.dword := SignExtend32(a{m}.word[2*i+1]) * SignExtend32(Cast_Int16(t.word[1]))
+			dst.dword[i] := dst.dword[i] + p1.dword + p2.dword
+		ENDFOR
+	ELSE
+		dst.dword[i] := src.dword[i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VP4DPWSSD" form="zmm {k}, zmm, m128" xed="VP4DPWSSD_ZMMi32_MASKmskw_ZMMi16_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_4dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a0" etype="SI16"/>
+	<parameter type="__m512i" varname="a1" etype="SI16"/>
+	<parameter type="__m512i" varname="a2" etype="SI16"/>
+	<parameter type="__m512i" varname="a3" etype="SI16"/>
+	<parameter type="__m128i *" varname="b" etype="SI16" memwidth="128"/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	IF k[i]
+		FOR m := 0 to 3
+			lim_base := b + m*32
+			t.dword  := MEM[lim_base+31:lim_base]
+			p1.dword := SignExtend32(a{m}.word[2*i+0]) * SignExtend32(Cast_Int16(t.word[0]))
+			p2.dword := SignExtend32(a{m}.word[2*i+1]) * SignExtend32(Cast_Int16(t.word[1]))
+			dst.dword[i] := dst.dword[i] + p1.dword + p2.dword
+		ENDFOR
+	ELSE
+		dst.dword[i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VP4DPWSSD" form="zmm {z}, zmm, m128" xed="VP4DPWSSD_ZMMi32_MASKmskw_ZMMi16_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_4dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a0" etype="SI16"/>
+	<parameter type="__m512i" varname="a1" etype="SI16"/>
+	<parameter type="__m512i" varname="a2" etype="SI16"/>
+	<parameter type="__m512i" varname="a3" etype="SI16"/>
+	<parameter type="__m128i *" varname="b" etype="SI16" memwidth="128"/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation and signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	FOR m := 0 to 3
+		lim_base := b + m*32
+		t.dword  := MEM[lim_base+31:lim_base]
+		p1.dword := SignExtend32(a{m}.word[2*i+0]) * SignExtend32(Cast_Int16(t.word[0]))
+		p2.dword := SignExtend32(a{m}.word[2*i+1]) * SignExtend32(Cast_Int16(t.word[1]))
+		dst.dword[i] := Saturate32(dst.dword[i] + p1.dword + p2.dword)
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VP4DPWSSDS" form="zmm, zmm, m128" xed="VP4DPWSSDS_ZMMi32_MASKmskw_ZMMi16_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_4dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a0" etype="SI16"/>
+	<parameter type="__m512i" varname="a1" etype="SI16"/>
+	<parameter type="__m512i" varname="a2" etype="SI16"/>
+	<parameter type="__m512i" varname="a3" etype="SI16"/>
+	<parameter type="__m128i *" varname="b" etype="SI16" memwidth="128"/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask and signed saturation, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)..</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	IF k[i]
+		FOR m := 0 to 3
+			lim_base := b + m*32
+			t.dword  := MEM[lim_base+31:lim_base]
+			p1.dword := SignExtend32(a{m}.word[2*i+0]) * SignExtend32(Cast_Int16(t.word[0]))
+			p2.dword := SignExtend32(a{m}.word[2*i+1]) * SignExtend32(Cast_Int16(t.word[1]))
+			dst.dword[i] := Saturate32(dst.dword[i] + p1.dword + p2.dword)
+		ENDFOR
+	ELSE
+		dst.dword[i] := src.dword[i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VP4DPWSSDS" form="zmm {k}, zmm, m128" xed="VP4DPWSSDS_ZMMi32_MASKmskw_ZMMi16_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_4dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_4VNNIW</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a0" etype="SI16"/>
+	<parameter type="__m512i" varname="a1" etype="SI16"/>
+	<parameter type="__m512i" varname="a2" etype="SI16"/>
+	<parameter type="__m512i" varname="a3" etype="SI16"/>
+	<parameter type="__m128i *" varname="b" etype="SI16" memwidth="128"/>
+	<description>Compute 4 sequential operand source-block dot-products of two signed 16-bit element operands with 32-bit element accumulation with mask and signed saturation, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set)..</description>
+	<operation>
+dst[511:0] := src[511:0]
+FOR i := 0 to 15
+	IF k[i]
+		FOR m := 0 to 3
+			lim_base := b + m*32
+			t.dword  := MEM[lim_base+31:lim_base]
+			p1.dword := SignExtend32(a{m}.word[2*i+0]) * SignExtend32(Cast_Int16(t.word[0]))
+			p2.dword := SignExtend32(a{m}.word[2*i+1]) * SignExtend32(Cast_Int16(t.word[1]))
+			dst.dword[i] := Saturate32(dst.dword[i] + p1.dword + p2.dword)
+		ENDFOR
+	ELSE
+		dst.dword[i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VP4DPWSSDS" form="zmm {z}, zmm, m128" xed="VP4DPWSSDS_ZMMi32_MASKmskw_ZMMi16_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	IF j &lt; 4
+		t := b.fp32[j]
+	ELSE
+		t := a.fp32[j-4]
+	FI
+	dst.word[j] := Convert_FP32_To_BF16(t)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="xmm, xmm, xmm" xed="VCVTNE2PS2BF16_XMMbf16_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__m128bh" varname="src" etype="BF16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		IF j &lt; 4
+			t := b.fp32[j]
+		ELSE
+			t := a.fp32[j-4]
+		FI
+		dst.word[j] := Convert_FP32_To_BF16(t)
+	ELSE
+		dst.word[j] := src.word[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="xmm {k}, xmm, xmm" xed="VCVTNE2PS2BF16_XMMbf16_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		IF j &lt; 4
+			t := b.fp32[j]
+		ELSE
+			t := a.fp32[j-4]
+		FI
+		dst.word[j] := Convert_FP32_To_BF16(t)
+	ELSE
+		dst.word[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="xmm {z}, xmm, xmm" xed="VCVTNE2PS2BF16_XMMbf16_MASKmskw_XMMf32_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m256bh" varname="dst" etype="BF16"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	IF j &lt; 8
+		t := b.fp32[j]
+	ELSE
+		t := a.fp32[j-8]
+	FI
+	dst.word[j] := Convert_FP32_To_BF16(t)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="ymm, ymm, ymm" xed="VCVTNE2PS2BF16_YMMbf16_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m256bh" varname="dst" etype="BF16"/>
+	<parameter type="__m256bh" varname="src" etype="BF16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		IF j &lt; 8
+			t := b.fp32[j]
+		ELSE
+			t := a.fp32[j-8]
+		FI
+		dst.word[j] := Convert_FP32_To_BF16(t)
+	ELSE
+		dst.word[j] := src.word[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="ymm {k}, ymm, ymm" xed="VCVTNE2PS2BF16_YMMbf16_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m256bh" varname="dst" etype="BF16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		IF j &lt; 8
+			t := b.fp32[j]
+		ELSE
+			t := a.fp32[j-8]
+		FI
+		dst.word[j] := Convert_FP32_To_BF16(t)
+	ELSE
+		dst.word[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="ymm {z}, ymm, ymm" xed="VCVTNE2PS2BF16_YMMbf16_MASKmskw_YMMf32_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512bh" varname="dst" etype="BF16"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	IF j &lt; 16
+		t := b.fp32[j]
+	ELSE
+		t := a.fp32[j-16]
+	FI
+	dst.word[j] := Convert_FP32_To_BF16(t)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="zmm, zmm, zmm" xed="VCVTNE2PS2BF16_ZMMbf16_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512bh" varname="dst" etype="BF16"/>
+	<parameter type="__m512bh" varname="src" etype="BF16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	IF k[j]
+		IF j &lt; 16
+			t := b.fp32[j]
+		ELSE
+			t := a.fp32[j-16]
+		FI
+		dst.word[j] := Convert_FP32_To_BF16(t)
+	ELSE
+		dst.word[j] := src.word[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="zmm {k}, zmm, zmm" xed="VCVTNE2PS2BF16_ZMMbf16_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtne2ps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m512bh" varname="dst" etype="BF16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in two vectors "a" and "b" to packed BF16 (16-bit) floating-point elements, and store the results in single vector "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	IF k[j]
+		IF j &lt; 16
+			t := b.fp32[j]
+		ELSE
+			t := a.fp32[j-16]
+		FI
+		dst.word[j] := Convert_FP32_To_BF16(t)
+	ELSE
+		dst.word[j] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTNE2PS2BF16" form="zmm {z}, zmm, zmm" xed="VCVTNE2PS2BF16_ZMMbf16_MASKmskw_ZMMf32_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="xmm, xmm" xed="VCVTNEPS2BF16_XMMbf16_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__m128bh" varname="src" etype="BF16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+	ELSE
+		dst.word[j] := src.word[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="xmm {k}, xmm" xed="VCVTNEPS2BF16_XMMbf16_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+	ELSE
+		dst.word[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="xmm {z}, xmm" xed="VCVTNEPS2BF16_XMMbf16_MASKmskw_XMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="xmm, ymm" xed="VCVTNEPS2BF16_XMMbf16_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__m128bh" varname="src" etype="BF16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+	ELSE
+		dst.word[j] := src.word[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="xmm {k}, ymm" xed="VCVTNEPS2BF16_XMMbf16_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Convert</category>
+	<return type="__m128bh" varname="dst" etype="BF16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+	ELSE
+		dst.word[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="xmm {z}, ymm" xed="VCVTNEPS2BF16_XMMbf16_MASKmskw_YMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256bh" varname="dst" etype="BF16"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="ymm, zmm" xed="VCVTNEPS2BF16_YMMbf16_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256bh" varname="dst" etype="BF16"/>
+	<parameter type="__m256bh" varname="src" etype="BF16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+	ELSE
+		dst.word[j] := src.word[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="ymm {k}, zmm" xed="VCVTNEPS2BF16_YMMbf16_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_cvtneps_pbh">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Convert</category>
+	<return type="__m256bh" varname="dst" etype="BF16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed BF16 (16-bit) floating-point elements, and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+	ELSE
+		dst.word[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTNEPS2BF16" form="ymm {z}, zmm" xed="VCVTNEPS2BF16_YMMbf16_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__m128bh" varname="a" etype="BF16"/>
+	<parameter type="__m128bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst".</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 3
+	dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+	dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="xmm, xmm, xmm" xed="VDPBF16PS_XMMf32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128bh" varname="a" etype="BF16"/>
+	<parameter type="__m128bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 3
+	IF k[j]
+		dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+		dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="xmm {k}, xmm, xmm" xed="VDPBF16PS_XMMf32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128" varname="src" etype="FP32"/>
+	<parameter type="__m128bh" varname="a" etype="BF16"/>
+	<parameter type="__m128bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 3
+	IF k[j]
+		dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+		dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="xmm {z}, xmm, xmm" xed="VDPBF16PS_XMMf32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__m256bh" varname="a" etype="BF16"/>
+	<parameter type="__m256bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst".</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 7
+	dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+	dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="ymm, ymm, ymm" xed="VDPBF16PS_YMMf32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256bh" varname="a" etype="BF16"/>
+	<parameter type="__m256bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 7
+	IF k[j]
+		dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+		dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="ymm {k}, ymm, ymm" xed="VDPBF16PS_YMMf32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256" varname="src" etype="FP32"/>
+	<parameter type="__m256bh" varname="a" etype="BF16"/>
+	<parameter type="__m256bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 7
+	IF k[j]
+		dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+		dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="ymm {z}, ymm, ymm" xed="VDPBF16PS_YMMf32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__m512bh" varname="a" etype="BF16"/>
+	<parameter type="__m512bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst".</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 15
+	dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+	dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="zmm, zmm, zmm" xed="VDPBF16PS_ZMMf32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512bh" varname="a" etype="BF16"/>
+	<parameter type="__m512bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 15
+	IF k[j]
+		dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+		dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="zmm {k}, zmm, zmm" xed="VDPBF16PS_ZMMf32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_dpbf16_ps">
+	<type>Floating Point</type>
+	<CPUID>AVX512_BF16</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__m512bh" varname="a" etype="BF16"/>
+	<parameter type="__m512bh" varname="b" etype="BF16"/>
+	<description>Compute dot-product of BF16 (16-bit) floating-point pairs in "a" and "b", accumulating the intermediate single-precision (32-bit) floating-point elements with elements in "src", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE make_fp32(x[15:0]) {
+	y.fp32  := 0.0
+	y[31:16] := x[15:0]
+	RETURN y
+}
+dst := src
+FOR j := 0 to 15
+	IF k[j]
+		dst.fp32[j] += make_fp32(a.bf16[2*j+1]) * make_fp32(b.bf16[2*j+1])
+		dst.fp32[j] += make_fp32(a.bf16[2*j+0]) * make_fp32(b.bf16[2*j+0])
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VDPBF16PS" form="zmm {z}, zmm, zmm" xed="VDPBF16PS_ZMMf32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_bitshuffle_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__mmask64" varname="dst" etype="MASK"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 7 //Qword
+	FOR j := 0 to 7 // Byte
+		IF k[i*8+j]
+			m := c.qword[i].byte[j] &amp; 0x3F
+			dst[i*8+j] := b.qword[i].bit[m]
+		ELSE
+			dst[i*8+j] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPSHUFBITQMB" form="k {k}, zmm, zmm" xed="VPSHUFBITQMB_MASKmskw_MASKmskw_ZMMu64_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_bitshuffle_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__mmask64" varname="dst" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 7 //Qword
+	FOR j := 0 to 7 // Byte
+		m := c.qword[i].byte[j] &amp; 0x3F
+		dst[i*8+j] := b.qword[i].bit[m]
+	ENDFOR
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VPSHUFBITQMB" form="k, zmm, zmm" xed="VPSHUFBITQMB_MASKmskw_MASKmskw_ZMMu64_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_bitshuffle_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__mmask32" varname="dst" etype="MASK"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 3 //Qword
+	FOR j := 0 to 7 // Byte
+		IF k[i*8+j]
+			m := c.qword[i].byte[j] &amp; 0x3F
+			dst[i*8+j] := b.qword[i].bit[m]
+		ELSE
+			dst[i*8+j] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPSHUFBITQMB" form="k {k}, ymm, ymm" xed="VPSHUFBITQMB_MASKmskw_MASKmskw_YMMu64_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_bitshuffle_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__mmask32" varname="dst" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 3 //Qword
+	FOR j := 0 to 7 // Byte
+		m := c.qword[i].byte[j] &amp; 0x3F
+		dst[i*8+j] := b.qword[i].bit[m]
+	ENDFOR
+ENDFOR
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="VPSHUFBITQMB" form="k, ymm, ymm" xed="VPSHUFBITQMB_MASKmskw_MASKmskw_YMMu64_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_bitshuffle_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__mmask16" varname="dst" etype="MASK"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 1 //Qword
+	FOR j := 0 to 7 // Byte
+		IF k[i*8+j]
+			m := c.qword[i].byte[j] &amp; 0x3F
+			dst[i*8+j] := b.qword[i].bit[m]
+		ELSE
+			dst[i*8+j] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPSHUFBITQMB" form="k {k}, xmm, xmm" xed="VPSHUFBITQMB_MASKmskw_MASKmskw_XMMu64_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_bitshuffle_epi64_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__mmask16" varname="dst" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Gather 64 bits from "b" using selection bits in "c". For each 64-bit element in "b", gather 8 bits from the 64-bit element in "b" at 8 bit position controlled by the 8 corresponding 8-bit elements of "c", and store the result in the corresponding 8-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 1 //Qword
+	FOR j := 0 to 7 // Byte
+		m := c.qword[i].byte[j] &amp; 0x3F
+		dst[i*8+j] := b.qword[i].bit[m]
+	ENDFOR
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="VPSHUFBITQMB" form="k, xmm, xmm" xed="VPSHUFBITQMB_MASKmskw_MASKmskw_XMMu64_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := POPCNT(a[i+15:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="zmm, zmm" xed="VPOPCNTW_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := POPCNT(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="zmm {k}, zmm" xed="VPOPCNTW_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := POPCNT(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="zmm {z}, zmm" xed="VPOPCNTW_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := POPCNT(a[i+15:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="ymm, ymm" xed="VPOPCNTW_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_mask_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := POPCNT(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="ymm {k}, ymm" xed="VPOPCNTW_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_maskz_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := POPCNT(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="ymm {z}, ymm" xed="VPOPCNTW_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := POPCNT(a[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="xmm, xmm" xed="VPOPCNTW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_mask_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := POPCNT(a[i+15:i])
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="xmm {k}, xmm" xed="VPOPCNTW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_maskz_popcnt_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Count the number of logical 1 bits in packed 16-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := POPCNT(a[i+15:i])
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTW" form="xmm {z}, xmm" xed="VPOPCNTW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 63
+	i := j*8
+	dst[i+7:i] := POPCNT(a[i+7:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="zmm, zmm" xed="VPOPCNTB_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_mask_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := POPCNT(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="zmm {k}, zmm" xed="VPOPCNTB_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm512_maskz_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := POPCNT(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="zmm {z}, zmm" xed="VPOPCNTB_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 31
+	i := j*8
+	dst[i+7:i] := POPCNT(a[i+7:i])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="ymm, ymm" xed="VPOPCNTB_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_mask_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := POPCNT(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="ymm {k}, ymm" xed="VPOPCNTB_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm256_maskz_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := POPCNT(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="ymm {z}, ymm" xed="VPOPCNTB_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := POPCNT(a[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="xmm, xmm" xed="VPOPCNTB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_mask_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := POPCNT(a[i+7:i])
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="xmm {k}, xmm" xed="VPOPCNTB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" vexEq="TRUE" name="_mm_maskz_popcnt_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_BITALG</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Count the number of logical 1 bits in packed 8-bit integers in "a", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE POPCNT(a) {
+	count := 0
+	DO WHILE a &gt; 0
+		count += a[0]
+		a &gt;&gt;= 1
+	OD
+	RETURN count
+}
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := POPCNT(a[i+7:i])
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPOPCNTB" form="xmm {z}, xmm" xed="VPOPCNTB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 7
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		dst[q+j*8+7:q+j*8] := tmp8[7:0]
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="zmm, zmm, zmm" xed="VPMULTISHIFTQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 7
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="zmm {k}, zmm, zmm" xed="VPMULTISHIFTQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 7
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="zmm {z}, zmm, zmm" xed="VPMULTISHIFTQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 3
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		dst[q+j*8+7:q+j*8] := tmp8[7:0]
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="ymm, ymm, ymm" xed="VPMULTISHIFTQB_YMMu8_MASKmskw_YMMu8_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 3
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="ymm {k}, ymm, ymm" xed="VPMULTISHIFTQB_YMMu8_MASKmskw_YMMu8_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 3
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="ymm {z}, ymm, ymm" xed="VPMULTISHIFTQB_YMMu8_MASKmskw_YMMu8_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst".</description>
+	<operation>
+FOR i := 0 to 1
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		dst[q+j*8+7:q+j*8] := tmp8[7:0]
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="xmm, xmm, xmm" xed="VPMULTISHIFTQB_XMMu8_MASKmskw_XMMu8_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 1
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="xmm {k}, xmm, xmm" xed="VPMULTISHIFTQB_XMMu8_MASKmskw_XMMu8_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_multishift_epi64_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>For each 64-bit element in "b", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of "a", and store the 8 assembled bytes to the corresponding 64-bit element of "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR i := 0 to 1
+	q := i * 64
+	FOR j := 0 to 7
+		tmp8 := 0
+		ctrl := a[q+j*8+7:q+j*8] &amp; 63
+		FOR l := 0 to 7
+			tmp8[l] := b[q+((ctrl+l) &amp; 63)]
+		ENDFOR
+		IF k[i*8+j]
+			dst[q+j*8+7:q+j*8] := tmp8[7:0]
+		ELSE
+			dst[q+j*8+7:q+j*8] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPMULTISHIFTQB" form="xmm {z}, xmm, xmm" xed="VPMULTISHIFTQB_XMMu8_MASKmskw_XMMu8_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="idx" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	id := idx[i+5:i]*8
+	dst[i+7:i] := a[id+7:id]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMB" form="zmm, zmm, zmm" xed="VPERMB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	id := idx[i+5:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMB" form="zmm {k}, zmm, zmm" xed="VPERMB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	id := idx[i+5:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMB" form="zmm {z}, zmm, zmm" xed="VPERMB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="idx" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	id := idx[i+4:i]*8
+	dst[i+7:i] := a[id+7:id]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMB" form="ymm, ymm, ymm" xed="VPERMB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	id := idx[i+4:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMB" form="ymm {k}, ymm, ymm" xed="VPERMB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	id := idx[i+4:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMB" form="ymm {z}, ymm, ymm" xed="VPERMB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="idx" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	id := idx[i+3:i]*8
+	dst[i+7:i] := a[id+7:id]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMB" form="xmm, xmm, xmm" xed="VPERMB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	id := idx[i+3:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMB" form="xmm {k}, xmm, xmm" xed="VPERMB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutexvar_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" using the corresponding index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	id := idx[i+3:i]*8
+	IF k[j]
+		dst[i+7:i] := a[id+7:id]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMB" form="xmm {z}, xmm, xmm" xed="VPERMB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="idx" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	off := 8*idx[i+5:i]
+	dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="zmm, zmm, zmm" xed="VPERMI2B_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="idx" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+5:i]
+		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMT2B" form="zmm {k}, zmm, zmm" xed="VPERMT2B_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask2_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="idx" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+5:i]
+		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := idx[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="zmm {k}, zmm, zmm" xed="VPERMI2B_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="idx" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+5:i]
+		dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="zmm {z}, zmm, zmm" xed="VPERMI2B_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<instruction name="VPERMT2B" form="zmm {z}, zmm, zmm" xed="VPERMT2B_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="idx" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	off := 8*idx[i+4:i]
+	dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="ymm, ymm, ymm" xed="VPERMI2B_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="idx" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+4:i]
+		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMT2B" form="ymm {k}, ymm, ymm" xed="VPERMT2B_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask2_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="idx" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+4:i]
+		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := idx[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="ymm {k}, ymm, ymm" xed="VPERMI2B_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="idx" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" across lanes using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+4:i]
+		dst[i+7:i] := idx[i+5] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="ymm {z}, ymm, ymm" xed="VPERMI2B_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<instruction name="VPERMT2B" form="ymm {z}, ymm, ymm" xed="VPERMT2B_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="idx" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	off := 8*idx[i+3:i]
+	dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="xmm, xmm, xmm" xed="VPERMI2B_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="idx" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+3:i]
+		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMT2B" form="xmm {k}, xmm, xmm" xed="VPERMT2B_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask2_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="idx" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+3:i]
+		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := idx[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="xmm {k}, xmm, xmm" xed="VPERMI2B_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_permutex2var_epi8">
+	<CPUID>AVX512_VBMI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="idx" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Shuffle 8-bit integers in "a" and "b" using the corresponding selector and index in "idx", and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		off := 8*idx[i+3:i]
+		dst[i+7:i] := idx[i+4] ? b[off+7:off] : a[off+7:off]
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPERMI2B" form="xmm {z}, xmm, xmm" xed="VPERMI2B_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<instruction name="VPERMT2B" form="xmm {z}, xmm, xmm" xed="VPERMT2B_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="zmm {z}, zmm, zmm" xed="VPSHRDVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="zmm {k}, zmm, zmm" xed="VPSHRDVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="zmm, zmm, zmm" xed="VPSHRDVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="ymm {z}, ymm, ymm" xed="VPSHRDVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="ymm {k}, ymm, ymm" xed="VPSHRDVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="ymm, ymm, ymm" xed="VPSHRDVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="xmm {z}, xmm, xmm" xed="VPSHRDVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="xmm {k}, xmm, xmm" xed="VPSHRDVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shrdv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; (c[i+63:i] &amp; 63)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVQ" form="xmm, xmm, xmm" xed="VPSHRDVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="zmm {z}, zmm, zmm" xed="VPSHRDVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="zmm {k}, zmm, zmm" xed="VPSHRDVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="zmm, zmm, zmm" xed="VPSHRDVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="ymm {z}, ymm, ymm" xed="VPSHRDVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="ymm {k}, ymm, ymm" xed="VPSHRDVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="ymm, ymm, ymm" xed="VPSHRDVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="xmm {z}, xmm, xmm" xed="VPSHRDVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="xmm {k}, xmm, xmm" xed="VPSHRDVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shrdv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; (c[i+31:i] &amp; 31)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVD" form="xmm, xmm, xmm" xed="VPSHRDVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="__m512i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="zmm {z}, zmm, zmm" xed="VPSHRDVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="__m512i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="zmm {k}, zmm, zmm" xed="VPSHRDVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="__m512i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="zmm, zmm, zmm" xed="VPSHRDVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="__m256i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="ymm {z}, ymm, ymm" xed="VPSHRDVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="__m256i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="ymm {k}, ymm, ymm" xed="VPSHRDVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="__m256i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="ymm, ymm, ymm" xed="VPSHRDVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="__m128i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="xmm {z}, xmm, xmm" xed="VPSHRDVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="__m128i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="xmm {k}, xmm, xmm" xed="VPSHRDVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shrdv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="__m128i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of "c", and store the lower 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; (c[i+15:i] &amp; 15)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDVW" form="xmm, xmm, xmm" xed="VPSHRDVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="zmm {z}, zmm, zmm, imm8" xed="VPSHRDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="zmm {k}, zmm, zmm, imm8" xed="VPSHRDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="zmm, zmm, zmm, imm8" xed="VPSHRDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="ymm {z}, ymm, ymm, imm8" xed="VPSHRDQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="ymm {k}, ymm, ymm, imm8" xed="VPSHRDQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="ymm, ymm, ymm, imm8" xed="VPSHRDQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="xmm {z}, xmm, xmm, imm8" xed="VPSHRDQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="xmm {k}, xmm, xmm, imm8" xed="VPSHRDQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shrdi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "b" and "a" producing an intermediate 128-bit result. Shift the result right by "imm8" bits, and store the lower 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ((b[i+63:i] &lt;&lt; 64)[127:0] | a[i+63:i]) &gt;&gt; imm8[5:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDQ" form="xmm, xmm, xmm, imm8" xed="VPSHRDQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="zmm {z}, zmm, zmm, imm8" xed="VPSHRDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="zmm {k}, zmm, zmm, imm8" xed="VPSHRDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="zmm, zmm, zmm, imm8" xed="VPSHRDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="ymm {z}, ymm, ymm, imm8" xed="VPSHRDD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="ymm {k}, ymm, ymm, imm8" xed="VPSHRDD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="ymm, ymm, ymm, imm8" xed="VPSHRDD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="xmm {z}, xmm, xmm, imm8" xed="VPSHRDD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="xmm {k}, xmm, xmm, imm8" xed="VPSHRDD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shrdi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "b" and "a" producing an intermediate 64-bit result. Shift the result right by "imm8" bits, and store the lower 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ((b[i+31:i] &lt;&lt; 32)[63:0] | a[i+31:i]) &gt;&gt; imm8[4:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDD" form="xmm, xmm, xmm, imm8" xed="VPSHRDD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="zmm {z}, zmm, zmm, imm8" xed="VPSHRDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="zmm {k}, zmm, zmm, imm8" xed="VPSHRDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="zmm, zmm, zmm, imm8" xed="VPSHRDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="ymm {z}, ymm, ymm, imm8" xed="VPSHRDW_YMMu16_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="ymm {k}, ymm, ymm, imm8" xed="VPSHRDW_YMMu16_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="ymm, ymm, ymm, imm8" xed="VPSHRDW_YMMu16_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="xmm {z}, xmm, xmm, imm8" xed="VPSHRDW_XMMu16_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="xmm {k}, xmm, xmm, imm8" xed="VPSHRDW_XMMu16_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shrdi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "b" and "a" producing an intermediate 32-bit result. Shift the result right by "imm8" bits, and store the lower 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ((b[i+15:i] &lt;&lt; 16)[31:0] | a[i+15:i]) &gt;&gt; imm8[3:0]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHRDW" form="xmm, xmm, xmm, imm8" xed="VPSHRDW_XMMu16_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="zmm {z}, zmm, zmm" xed="VPSHLDVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="zmm {k}, zmm, zmm" xed="VPSHLDVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__m512i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+	dst[i+63:i] := tmp[127:64]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="zmm, zmm, zmm" xed="VPSHLDVQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="ymm {z}, ymm, ymm" xed="VPSHLDVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="ymm {k}, ymm, ymm" xed="VPSHLDVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__m256i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+	dst[i+63:i] := tmp[127:64]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="ymm, ymm, ymm" xed="VPSHLDVQ_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="xmm {z}, xmm, xmm" xed="VPSHLDVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="xmm {k}, xmm, xmm" xed="VPSHLDVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shldv_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__m128i" varname="c" etype="UI64"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 64-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; (c[i+63:i] &amp; 63)
+	dst[i+63:i] := tmp[127:64]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVQ" form="xmm, xmm, xmm" xed="VPSHLDVQ_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="zmm {z}, zmm, zmm" xed="VPSHLDVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="zmm {k}, zmm, zmm" xed="VPSHLDVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+	dst[i+31:i] := tmp[63:32]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="zmm, zmm, zmm" xed="VPSHLDVD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="ymm {z}, ymm, ymm" xed="VPSHLDVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="ymm {k}, ymm, ymm" xed="VPSHLDVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__m256i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+	dst[i+31:i] := tmp[63:32]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="ymm, ymm, ymm" xed="VPSHLDVD_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="xmm {z}, xmm, xmm" xed="VPSHLDVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="xmm {k}, xmm, xmm" xed="VPSHLDVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shldv_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="c" etype="UI32"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; (c[i+31:i] &amp; 31)
+	dst[i+31:i] := tmp[63:32]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVD" form="xmm, xmm, xmm" xed="VPSHLDVD_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="__m512i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="zmm {z}, zmm, zmm" xed="VPSHLDVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="__m512i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="zmm {k}, zmm, zmm" xed="VPSHLDVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="__m512i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="zmm, zmm, zmm" xed="VPSHLDVW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="__m256i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="ymm {z}, ymm, ymm" xed="VPSHLDVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="__m256i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="ymm {k}, ymm, ymm" xed="VPSHLDVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="__m256i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="ymm, ymm, ymm" xed="VPSHLDVW_YMMu16_MASKmskw_YMMu16_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="__m128i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="xmm {z}, xmm, xmm" xed="VPSHLDVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="__m128i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="xmm {k}, xmm, xmm" xed="VPSHLDVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shldv_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="__m128i" varname="c" etype="UI16"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of "c", and store the upper 16-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; (c[i+15:i] &amp; 15)
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDVW" form="xmm, xmm, xmm" xed="VPSHLDVW_XMMu16_MASKmskw_XMMu16_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="zmm {z}, zmm, zmm, imm8" xed="VPSHLDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="zmm {k}, zmm, zmm, imm8" xed="VPSHLDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst").</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+	dst[i+63:i] := tmp[127:64]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="zmm, zmm, zmm, imm8" xed="VPSHLDQ_ZMMu64_MASKmskw_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="ymm {z}, ymm, ymm, imm8" xed="VPSHLDQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="ymm {k}, ymm, ymm, imm8" xed="VPSHLDQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst").</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+	dst[i+63:i] := tmp[127:64]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="ymm, ymm, ymm, imm8" xed="VPSHLDQ_YMMu64_MASKmskw_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="xmm {z}, xmm, xmm, imm8" xed="VPSHLDQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF k[j]
+		tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+		dst[i+63:i] := tmp[127:64]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="xmm {k}, xmm, xmm, imm8" xed="VPSHLDQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shldi_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 64-bit integers in "a" and "b" producing an intermediate 128-bit result. Shift the result left by "imm8" bits, and store the upper 64-bits in "dst").</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	tmp[127:0] := ((a[i+63:i] &lt;&lt; 64)[127:0] | b[i+63:i]) &lt;&lt; imm8[5:0]
+	dst[i+63:i] := tmp[127:64]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDQ" form="xmm, xmm, xmm, imm8" xed="VPSHLDQ_XMMu64_MASKmskw_XMMu64_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="zmm {z}, zmm, zmm, imm8" xed="VPSHLDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="zmm {k}, zmm, zmm, imm8" xed="VPSHLDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+	dst[i+31:i] := tmp[63:32]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="zmm, zmm, zmm, imm8" xed="VPSHLDD_ZMMu32_MASKmskw_ZMMu32_ZMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="ymm {z}, ymm, ymm, imm8" xed="VPSHLDD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="ymm {k}, ymm, ymm, imm8" xed="VPSHLDD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI32"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+	dst[i+31:i] := tmp[63:32]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="ymm, ymm, ymm, imm8" xed="VPSHLDD_YMMu32_MASKmskw_YMMu32_YMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="xmm {z}, xmm, xmm, imm8" xed="VPSHLDD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF k[j]
+		tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+		dst[i+31:i] := tmp[63:32]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="xmm {k}, xmm, xmm, imm8" xed="VPSHLDD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shldi_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 32-bit integers in "a" and "b" producing an intermediate 64-bit result. Shift the result left by "imm8" bits, and store the upper 32-bits in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	tmp[63:0] := ((a[i+31:i] &lt;&lt; 32)[63:0] | b[i+31:i]) &lt;&lt; imm8[4:0]
+	dst[i+31:i] := tmp[63:32]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDD" form="xmm, xmm, xmm, imm8" xed="VPSHLDD_XMMu32_MASKmskw_XMMu32_XMMu32_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="zmm {z}, zmm, zmm, imm8" xed="VPSHLDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="zmm {k}, zmm, zmm, imm8" xed="VPSHLDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Shift</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<parameter type="__m512i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst").</description>
+	<operation>
+FOR j := 0 to 31
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="zmm, zmm, zmm, imm8" xed="VPSHLDW_ZMMu16_MASKmskw_ZMMu16_ZMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="ymm {z}, ymm, ymm, imm8" xed="VPSHLDW_YMMu16_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="ymm {k}, ymm, ymm, imm8" xed="VPSHLDW_YMMu16_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<parameter type="__m256i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst").</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="ymm, ymm, ymm, imm8" xed="VPSHLDW_YMMu16_MASKmskw_YMMu16_YMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="xmm {z}, xmm, xmm, imm8" xed="VPSHLDW_XMMu16_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+		dst[i+15:i] := tmp[31:16]
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="xmm {k}, xmm, xmm, imm8" xed="VPSHLDW_XMMu16_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_shldi_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Concatenate packed 16-bit integers in "a" and "b" producing an intermediate 32-bit result. Shift the result left by "imm8" bits, and store the upper 16-bits in "dst").</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := ((a[i+15:i] &lt;&lt; 16)[31:0] | b[i+15:i]) &lt;&lt; imm8[3:0]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPSHLDW" form="xmm, xmm, xmm, imm8" xed="VPSHLDW_XMMu16_MASKmskw_XMMu16_XMMu16_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expandloadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI16" memwidth="512"/>
+	<description>Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="zmm {z}, m512" xed="VPEXPANDW_ZMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expandloadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI16" memwidth="512"/>
+	<description>Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="zmm {k}, m512" xed="VPEXPANDW_ZMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expand_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[m+15:m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="zmm {z}, zmm" xed="VPEXPANDW_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expand_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[m+15:m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="zmm {k}, zmm" xed="VPEXPANDW_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expandloadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI16" memwidth="256"/>
+	<description>Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="ymm {z}, m256" xed="VPEXPANDW_YMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expandloadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI16" memwidth="256"/>
+	<description>Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="ymm {k}, m256" xed="VPEXPANDW_YMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expand_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[m+15:m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="ymm {z}, ymm" xed="VPEXPANDW_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expand_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[m+15:m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="ymm {k}, ymm" xed="VPEXPANDW_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expandloadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI16" memwidth="128"/>
+	<description>Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="xmm {z}, m128" xed="VPEXPANDW_XMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expandloadu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI16" memwidth="128"/>
+	<description>Load contiguous active 16-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := MEM[mem_addr+m+15:mem_addr+m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="xmm {k}, m128" xed="VPEXPANDW_XMMu16_MASKmskw_MEMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expand_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[m+15:m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="xmm {z}, xmm" xed="VPEXPANDW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expand_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Load contiguous active 16-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[i+15:i] := a[m+15:m]
+		m := m + 16
+	ELSE
+		dst[i+15:i] := src[i+15:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDW" form="xmm {k}, xmm" xed="VPEXPANDW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expandloadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI8" memwidth="512"/>
+	<description>Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="zmm {z}, m512" xed="VPEXPANDB_ZMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expandloadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI8" memwidth="512"/>
+	<description>Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="zmm {k}, m512" xed="VPEXPANDB_ZMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expandloadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI8" memwidth="256"/>
+	<description>Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="ymm {z}, m256" xed="VPEXPANDB_YMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expandloadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI8" memwidth="256"/>
+	<description>Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="ymm {k}, m256" xed="VPEXPANDB_YMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expandloadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<description>Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="xmm {z}, m128" xed="VPEXPANDB_XMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expandloadu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Load</category>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="const void*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<description>Load contiguous active 8-bit integers from unaligned memory at "mem_addr" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := MEM[mem_addr+m+7:mem_addr+m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="xmm {k}, m128" xed="VPEXPANDB_XMMu8_MASKmskw_MEMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_expand_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[m+7:m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="zmm {z}, zmm" xed="VPEXPANDB_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_expand_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[m+7:m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="zmm {k}, zmm" xed="VPEXPANDB_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_expand_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[m+7:m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="ymm {z}, ymm" xed="VPEXPANDB_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_expand_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[m+7:m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="ymm {k}, ymm" xed="VPEXPANDB_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_expand_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[m+7:m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="xmm {z}, xmm" xed="VPEXPANDB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_expand_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Load contiguous active 8-bit integers from "a" (those with their respective bit set in mask "k"), and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+m := 0
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[i+7:i] := a[m+7:m]
+		m := m + 8
+	ELSE
+		dst[i+7:i] := src[i+7:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPEXPANDB" form="xmm {k}, xmm" xed="VPEXPANDB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compressstoreu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="512"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 16
+m := base_addr
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		MEM[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSW" form="m512 {k}, zmm" xed="VPCOMPRESSW_MEMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compressstoreu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="256"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 16
+m := base_addr
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		MEM[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSW" form="m256 {k}, ymm" xed="VPCOMPRESSW_MEMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compressstoreu_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI16" memwidth="128"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 16
+m := base_addr
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		MEM[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSW" form="m128 {k}, xmm" xed="VPCOMPRESSW_MEMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_compress_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 16
+m := 0
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSW" form="zmm {z}, zmm" xed="VPCOMPRESSW_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compress_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI16"/>
+	<parameter type="__m512i" varname="src" etype="UI16"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 16
+m := 0
+FOR j := 0 to 31
+	i := j*16
+	IF k[j]
+		dst[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSW" form="zmm {k}, zmm" xed="VPCOMPRESSW_ZMMu16_MASKmskw_ZMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_compress_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 16
+m := 0
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSW" form="ymm {z}, ymm" xed="VPCOMPRESSW_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compress_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI16"/>
+	<parameter type="__m256i" varname="src" etype="UI16"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 16
+m := 0
+FOR j := 0 to 15
+	i := j*16
+	IF k[j]
+		dst[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSW" form="ymm {k}, ymm" xed="VPCOMPRESSW_YMMu16_MASKmskw_YMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_compress_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 16
+m := 0
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSW" form="xmm {z}, xmm" xed="VPCOMPRESSW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compress_epi16">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="src" etype="UI16"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Contiguously store the active 16-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 16
+m := 0
+FOR j := 0 to 7
+	i := j*16
+	IF k[j]
+		dst[m+size-1:m] := a[i+15:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSW" form="xmm {k}, xmm" xed="VPCOMPRESSW_XMMu16_MASKmskw_XMMu16_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compressstoreu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="512"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 8
+m := base_addr
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		MEM[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSB" form="m512 {k}, zmm" xed="VPCOMPRESSB_MEMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compressstoreu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="256"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 8
+m := base_addr
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		MEM[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSB" form="m256 {k}, ymm" xed="VPCOMPRESSB_MEMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compressstoreu_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Store</category>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI8" memwidth="128"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to unaligned memory at "base_addr".</description>
+	<operation>
+size := 8
+m := base_addr
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		MEM[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPCOMPRESSB" form="m128 {k}, xmm" xed="VPCOMPRESSB_MEMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_compress_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 8
+m := 0
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := 0
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSB" form="zmm {z}, zmm" xed="VPCOMPRESSB_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_compress_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 8
+m := 0
+FOR j := 0 to 63
+	i := j*8
+	IF k[j]
+		dst[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+dst[511:m] := src[511:m]
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCOMPRESSB" form="zmm {k}, zmm" xed="VPCOMPRESSB_ZMMu8_MASKmskw_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_compress_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 8
+m := 0
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := 0
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSB" form="ymm {z}, ymm" xed="VPCOMPRESSB_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_compress_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 8
+m := 0
+FOR j := 0 to 31
+	i := j*8
+	IF k[j]
+		dst[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+dst[255:m] := src[255:m]
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCOMPRESSB" form="ymm {k}, ymm" xed="VPCOMPRESSB_YMMu8_MASKmskw_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_compress_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in zeromask "k") to "dst", and set the remaining elements to zero.</description>
+	<operation>
+size := 8
+m := 0
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := 0
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSB" form="xmm {z}, xmm" xed="VPCOMPRESSB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_compress_epi8">
+	<type>Integer</type>
+	<CPUID>AVX512_VBMI2</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Contiguously store the active 8-bit integers in "a" (those with their respective bit set in writemask "k") to "dst", and pass through the remaining elements from "src".</description>
+	<operation>
+size := 8
+m := 0
+FOR j := 0 to 15
+	i := j*8
+	IF k[j]
+		dst[m+size-1:m] := a[i+7:i]
+		m := m + size
+	FI
+ENDFOR
+dst[127:m] := src[127:m]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPCOMPRESSB" form="xmm {k}, xmm" xed="VPCOMPRESSB_XMMu8_MASKmskw_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="zmm {z}, zmm, zmm" xed="VPDPWSSDS_ZMMi32_MASKmskw_ZMMi16_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="zmm {k}, zmm, zmm" xed="VPDPWSSDS_ZMMi32_MASKmskw_ZMMi16_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+	tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+	dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="zmm, zmm, zmm" xed="VPDPWSSDS_ZMMi32_MASKmskw_ZMMi16_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="ymm {z}, ymm, ymm" xed="VPDPWSSDS_YMMi32_MASKmskw_YMMi16_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="ymm {k}, ymm, ymm" xed="VPDPWSSDS_YMMi32_MASKmskw_YMMi16_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+	tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+	dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="ymm, ymm, ymm" xed="VPDPWSSDS_YMMi32_MASKmskw_YMMi16_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="xmm {z}, xmm, xmm" xed="VPDPWSSDS_XMMi32_MASKmskw_XMMi16_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="xmm {k}, xmm, xmm" xed="VPDPWSSDS_XMMi32_MASKmskw_XMMi16_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_dpwssds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+	tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+	dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPWSSDS" form="xmm, xmm, xmm" xed="VPDPWSSDS_XMMi32_MASKmskw_XMMi16_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="zmm {z}, zmm, zmm" xed="VPDPWSSD_ZMMi32_MASKmskw_ZMMi16_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="zmm {k}, zmm, zmm" xed="VPDPWSSD_ZMMi32_MASKmskw_ZMMi16_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="SI16"/>
+	<parameter type="__m512i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+	tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+	dst.dword[j] := src.dword[j] + tmp1 + tmp2
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="zmm, zmm, zmm" xed="VPDPWSSD_ZMMi32_MASKmskw_ZMMi16_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="ymm {z}, ymm, ymm" xed="VPDPWSSD_YMMi32_MASKmskw_YMMi16_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="ymm {k}, ymm, ymm" xed="VPDPWSSD_YMMi32_MASKmskw_YMMi16_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="SI16"/>
+	<parameter type="__m256i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+	tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+	dst.dword[j] := src.dword[j] + tmp1 + tmp2
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="ymm, ymm, ymm" xed="VPDPWSSD_YMMi32_MASKmskw_YMMi16_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="xmm {z}, xmm, xmm" xed="VPDPWSSD_XMMi32_MASKmskw_XMMi16_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+		tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="xmm {k}, xmm, xmm" xed="VPDPWSSD_XMMi32_MASKmskw_XMMi16_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_dpwssd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply groups of 2 adjacent pairs of signed 16-bit integers in "a" with corresponding 16-bit integers in "b", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	tmp1.dword := SignExtend32(a.word[2*j]) * SignExtend32(b.word[2*j])
+	tmp2.dword := SignExtend32(a.word[2*j+1]) * SignExtend32(b.word[2*j+1])
+	dst.dword[j] := src.dword[j] + tmp1 + tmp2
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPWSSD" form="xmm, xmm, xmm" xed="VPDPWSSD_XMMi32_MASKmskw_XMMi16_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="zmm {z}, zmm, zmm" xed="VPDPBUSDS_ZMMi32_MASKmskw_ZMMu8_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="zmm {k}, zmm, zmm" xed="VPDPBUSDS_ZMMi32_MASKmskw_ZMMu8_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+	tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+	tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+	tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+	dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="zmm, zmm, zmm" xed="VPDPBUSDS_ZMMi32_MASKmskw_ZMMu8_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="ymm {z}, ymm, ymm" xed="VPDPBUSDS_YMMi32_MASKmskw_YMMu8_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="ymm {k}, ymm, ymm" xed="VPDPBUSDS_YMMi32_MASKmskw_YMMu8_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+	tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+	tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+	tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+	dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="ymm, ymm, ymm" xed="VPDPBUSDS_YMMi32_MASKmskw_YMMu8_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="xmm {z}, xmm, xmm" xed="VPDPBUSDS_XMMi32_MASKmskw_XMMu8_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="xmm {k}, xmm, xmm" xed="VPDPBUSDS_XMMi32_MASKmskw_XMMu8_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_dpbusds_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src" using signed saturation, and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+	tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+	tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+	tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+	dst.dword[j] := Saturate32(src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPBUSDS" form="xmm, xmm, xmm" xed="VPDPBUSDS_XMMi32_MASKmskw_XMMu8_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_maskz_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="zmm {z}, zmm, zmm" xed="VPDPBUSD_ZMMi32_MASKmskw_ZMMu8_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_mask_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="zmm {k}, zmm, zmm" xed="VPDPBUSD_ZMMi32_MASKmskw_ZMMu8_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+	tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+	tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+	tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+	dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="zmm, zmm, zmm" xed="VPDPBUSD_ZMMi32_MASKmskw_ZMMu8_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_maskz_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="ymm {z}, ymm, ymm" xed="VPDPBUSD_YMMi32_MASKmskw_YMMu8_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_mask_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="ymm {k}, ymm, ymm" xed="VPDPBUSD_YMMi32_MASKmskw_YMMu8_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="SI32"/>
+	<parameter type="__m256i" varname="src" etype="SI32"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+	tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+	tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+	tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+	dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="ymm, ymm, ymm" xed="VPDPBUSD_YMMi32_MASKmskw_YMMu8_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_maskz_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+	ELSE
+		dst.dword[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="xmm {z}, xmm, xmm" xed="VPDPBUSD_XMMi32_MASKmskw_XMMu8_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_mask_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 3
+	IF k[j]
+		tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+		tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+		tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+		tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+		dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+	ELSE
+		dst.dword[j] := src.dword[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="xmm {k}, xmm, xmm" xed="VPDPBUSD_XMMi32_MASKmskw_XMMu8_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_dpbusd_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VNNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="src" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in "a" with corresponding signed 8-bit integers in "b", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in "src", and store the packed 32-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	tmp1.word := Signed(ZeroExtend16(a.byte[4*j]) * SignExtend16(b.byte[4*j]))
+	tmp2.word := Signed(ZeroExtend16(a.byte[4*j+1]) * SignExtend16(b.byte[4*j+1]))
+	tmp3.word := Signed(ZeroExtend16(a.byte[4*j+2]) * SignExtend16(b.byte[4*j+2]))
+	tmp4.word := Signed(ZeroExtend16(a.byte[4*j+3]) * SignExtend16(b.byte[4*j+3]))
+	dst.dword[j] := src.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VPDPBUSD" form="xmm, xmm, xmm" xed="VPDPBUSD_XMMi32_MASKmskw_XMMu8_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_2intersect_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VP2INTERSECT</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Mask</category>
+	<return type="void"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__mmask8*" varname="k1" etype="MASK" memwidth="8"/>
+	<parameter type="__mmask8*" varname="k2" etype="MASK" memwidth="8"/>
+	<description>Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers.</description>
+	<operation>
+MEM[k1+7:k1] := 0
+MEM[k2+7:k2] := 0
+FOR i := 0 TO 3
+	FOR j := 0 TO 3
+		match := (a.dword[i] == b.dword[j] ? 1 : 0)
+		MEM[k1+7:k1].bit[i] |= match
+		MEM[k2+7:k2].bit[j] |= match
+	ENDFOR
+ENDFOR
+	</operation>
+	<instruction name="VP2INTERSECTD" form="k, xmm, xmm" xed="VP2INTERSECTD_MASKmskw_XMMu32_XMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_2intersect_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VP2INTERSECT</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Mask</category>
+	<return type="void"/>
+	<parameter type="__m256i" varname="a" etype="UI32"/>
+	<parameter type="__m256i" varname="b" etype="UI32"/>
+	<parameter type="__mmask8*" varname="k1" etype="MASK" memwidth="8"/>
+	<parameter type="__mmask8*" varname="k2" etype="MASK" memwidth="8"/>
+	<description>Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers.</description>
+	<operation>
+MEM[k1+7:k1] := 0
+MEM[k2+7:k2] := 0
+FOR i := 0 TO 7
+	FOR j := 0 TO 7
+		match := (a.dword[i] == b.dword[j] ? 1 : 0)
+		MEM[k1+7:k1].bit[i] |= match
+		MEM[k2+7:k2].bit[j] |= match
+	ENDFOR
+ENDFOR
+	</operation>
+	<instruction name="VP2INTERSECTD" form="k, ymm, ymm" xed="VP2INTERSECTD_MASKmskw_YMMu32_YMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_2intersect_epi32">
+	<type>Integer</type>
+	<CPUID>AVX512_VP2INTERSECT</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__mmask16*" varname="k1" etype="MASK" memwidth="16"/>
+	<parameter type="__mmask16*" varname="k2" etype="MASK" memwidth="16"/>
+	<description>Compute intersection of packed 32-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers.</description>
+	<operation>
+MEM[k1+15:k1] := 0
+MEM[k2+15:k2] := 0
+FOR i := 0 TO 15
+	FOR j := 0 TO 15
+		match := (a.dword[i] == b.dword[j] ? 1 : 0)
+		MEM[k1+15:k1].bit[i] |= match
+		MEM[k2+15:k2].bit[j] |= match
+	ENDFOR
+ENDFOR
+	</operation>
+	<instruction name="VP2INTERSECTD" form="k, zmm, zmm" xed="VP2INTERSECTD_MASKmskw_ZMMu32_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm_2intersect_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VP2INTERSECT</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Mask</category>
+	<return type="void"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<parameter type="__mmask8*" varname="k1" etype="MASK" memwidth="8"/>
+	<parameter type="__mmask8*" varname="k2" etype="MASK" memwidth="8"/>
+	<description>Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers.</description>
+	<operation>
+MEM[k1+7:k1] := 0
+MEM[k2+7:k2] := 0
+FOR i := 0 TO 1
+	FOR j := 0 TO 1
+		match := (a.qword[i] == b.qword[j] ? 1 : 0)
+		MEM[k1+7:k1].bit[i] |= match
+		MEM[k2+7:k2].bit[j] |= match
+	ENDFOR
+ENDFOR
+	</operation>
+	<instruction name="VP2INTERSECTQ" form="k, xmm, xmm" xed="VP2INTERSECTQ_MASKmskw_XMMu64_XMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm256_2intersect_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VP2INTERSECT</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Mask</category>
+	<return type="void"/>
+	<parameter type="__m256i" varname="a" etype="UI64"/>
+	<parameter type="__m256i" varname="b" etype="UI64"/>
+	<parameter type="__mmask8*" varname="k1" etype="MASK" memwidth="8"/>
+	<parameter type="__mmask8*" varname="k2" etype="MASK" memwidth="8"/>
+	<description>Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers.</description>
+	<operation>
+MEM[k1+7:k1] := 0
+MEM[k2+7:k2] := 0
+FOR i := 0 TO 3
+	FOR j := 0 TO 3
+		match := (a.qword[i] == b.qword[j] ? 1 : 0)
+		MEM[k1+7:k1].bit[i] |= match
+		MEM[k2+7:k2].bit[j] |= match
+	ENDFOR
+ENDFOR
+	</operation>
+	<instruction name="VP2INTERSECTQ" form="k, ymm, ymm" xed="VP2INTERSECTQ_MASKmskw_YMMu64_YMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="AVX-512" name="_mm512_2intersect_epi64">
+	<type>Integer</type>
+	<CPUID>AVX512_VP2INTERSECT</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Mask</category>
+	<return type="void"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="__m512i" varname="b" etype="UI64"/>
+	<parameter type="__mmask8*" varname="k1" etype="MASK" memwidth="8"/>
+	<parameter type="__mmask8*" varname="k2" etype="MASK" memwidth="8"/>
+	<description>Compute intersection of packed 64-bit integer vectors "a" and "b", and store indication of match in the corresponding bit of two mask registers specified by "k1" and "k2". A match in corresponding elements of "a" and "b" is indicated by a set bit in the corresponding bit of the mask registers.</description>
+	<operation>
+MEM[k1+7:k1] := 0
+MEM[k2+7:k2] := 0
+FOR i := 0 TO 7
+	FOR j := 0 TO 7
+		match := (a.qword[i] == b.qword[j] ? 1 : 0)
+		MEM[k1+7:k1].bit[i] |= match
+		MEM[k2+7:k2].bit[j] |= match
+	ENDFOR
+ENDFOR
+	</operation>
+	<instruction name="VP2INTERSECTQ" form="k, zmm, zmm" xed="VP2INTERSECTQ_MASKmskw_ZMMu64_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bextr_u32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="start" etype="UI32"/>
+	<parameter type="unsigned int" varname="len" etype="UI32"/>
+	<description>Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start".</description>
+	<operation>
+tmp[511:0] := a
+dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]])
+	</operation>
+	<instruction name="BEXTR" form="r32, r32, r32" xed="BEXTR_VGPR32d_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bextr2_u32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="control" etype="UI32"/>
+	<description>Extract contiguous bits from unsigned 32-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control".</description>
+	<operation>
+start := control[7:0]
+len := control[15:8]
+tmp[511:0] := a
+dst[31:0] := ZeroExtend32(tmp[(start[7:0] + len[7:0] - 1):start[7:0]])
+	</operation>
+	<instruction name="BEXTR" form="r32, r32, r32" xed="BEXTR_VGPR32d_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bextr_u64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="start" etype="UI32"/>
+	<parameter type="unsigned int" varname="len" etype="UI32"/>
+	<description>Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by "len", starting at the bit specified by "start".</description>
+	<operation>
+tmp[511:0] := a
+dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]])
+	</operation>
+	<instruction name="BEXTR" form="r64, r64, r64" xed="BEXTR_VGPR64q_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bextr2_u64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="control" etype="UI64"/>
+	<description>Extract contiguous bits from unsigned 64-bit integer "a", and store the result in "dst". Extract the number of bits specified by bits 15:8 of "control", starting at the bit specified by bits 0:7 of "control"..</description>
+	<operation>
+start := control[7:0]
+len := control[15:8]
+tmp[511:0] := a
+dst[63:0] := ZeroExtend64(tmp[(start[7:0] + len[7:0] - 1):start[7:0]])
+	</operation>
+	<instruction name="BEXTR" form="r64, r64, r64" xed="BEXTR_VGPR64q_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_blsi_u32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Extract the lowest set bit from unsigned 32-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a".</description>
+	<operation>
+dst := (-a) AND a
+	</operation>
+	<instruction name="BLSI" form="r32, r32" xed="BLSI_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_blsi_u64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Extract the lowest set bit from unsigned 64-bit integer "a" and set the corresponding bit in "dst". All other bits in "dst" are zeroed, and all bits are zeroed if no bits are set in "a".</description>
+	<operation>
+dst := (-a) AND a
+	</operation>
+	<instruction name="BLSI" form="r64, r64" xed="BLSI_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_blsmsk_u32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 32-bit integer "a".</description>
+	<operation>
+dst := (a - 1) XOR a
+	</operation>
+	<instruction name="BLSMSK" form="r32, r32" xed="BLSMSK_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_blsmsk_u64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Set all the lower bits of "dst" up to and including the lowest set bit in unsigned 64-bit integer "a".</description>
+	<operation>
+dst := (a - 1) XOR a
+	</operation>
+	<instruction name="BLSMSK" form="r64, r64" xed="BLSMSK_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_blsr_u32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a".</description>
+	<operation>
+dst := (a - 1) AND a
+	</operation>
+	<instruction name="BLSR" form="r32, r32" xed="BLSR_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_blsr_u64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the bit in "dst" that corresponds to the lowest set bit in "a".</description>
+	<operation>
+dst := (a - 1) AND a
+	</operation>
+	<instruction name="BLSR" form="r64, r64" xed="BLSR_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_andn_u32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<description>Compute the bitwise NOT of 32-bit integer "a" and then AND with b, and store the results in dst.</description>
+	<operation>
+dst[31:0] := ((NOT a[31:0]) AND b[31:0])
+	</operation>
+	<instruction name="ANDN" form="r32, r32, r32" xed="ANDN_VGPR32d_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_andn_u64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of 64-bit integer "a" and then AND with b, and store the results in dst.</description>
+	<operation>
+dst[63:0] := ((NOT a[63:0]) AND b[63:0])
+	</operation>
+	<instruction name="ANDN" form="r64, r64, r64" xed="ANDN_VGPR64q_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_tzcnt_u32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 32) AND a[tmp] == 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD
+	</operation>
+	<instruction name="TZCNT" form="r32, r32" xed="TZCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_tzcnt_u64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 64) AND a[tmp] == 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD
+	</operation>
+	<instruction name="TZCNT" form="r64, r64" xed="TZCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_tzcnt_32">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Count the number of trailing zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 32) AND a[tmp] == 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD
+	</operation>
+	<instruction name="TZCNT" form="r32, r32" xed="TZCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_tzcnt_64">
+	<type>Integer</type>
+	<CPUID>BMI1</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Count the number of trailing zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 0
+dst := 0
+DO WHILE ((tmp &lt; 64) AND a[tmp] == 0)
+	tmp := tmp + 1
+	dst := dst + 1
+OD
+	</operation>
+	<instruction name="TZCNT" form="r64, r64" xed="TZCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bzhi_u32">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="index" etype="UI32"/>
+	<description>Copy all bits from unsigned 32-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index".</description>
+	<operation>
+n := index[7:0]
+dst := a
+IF (n &lt; 32)
+	dst[31:n] := 0
+FI
+	</operation>
+	<instruction name="BZHI" form="r32, r32, r32" xed="BZHI_VGPR32d_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bzhi_u64">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned int" varname="index" etype="UI32"/>
+	<description>Copy all bits from unsigned 64-bit integer "a" to "dst", and reset (set to 0) the high bits in "dst" starting at "index".</description>
+	<operation>
+n := index[7:0]
+dst := a
+IF (n &lt; 64)
+	dst[63:n] := 0
+FI
+	</operation>
+	<instruction name="BZHI" form="r64, r64, r64" xed="BZHI_VGPR64q_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_pdep_u32">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="mask" etype="UI32"/>
+	<description>Deposit contiguous low bits from unsigned 32-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 32
+	IF mask[m] == 1
+		dst[m] := tmp[k]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name="PDEP" form="r32, r32, r32" xed="PDEP_VGPR32d_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_pdep_u64">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="mask" etype="UI64"/>
+	<description>Deposit contiguous low bits from unsigned 64-bit integer "a" to "dst" at the corresponding bit locations specified by "mask"; all other bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 64
+	IF mask[m] == 1
+		dst[m] := tmp[k]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name="PDEP" form="r64, r64, r64" xed="PDEP_VGPR64q_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_pext_u32">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="mask" etype="UI32"/>
+	<description>Extract bits from unsigned 32-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 32
+	IF mask[m] == 1
+		dst[k] := tmp[m]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name="PEXT" form="r32, r32, r32" xed="PEXT_VGPR32d_VGPR32d_VGPR32d"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_pext_u64">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="mask" etype="UI64"/>
+	<description>Extract bits from unsigned 64-bit integer "a" at the corresponding bit locations specified by "mask" to contiguous low bits in "dst"; the remaining upper bits in "dst" are set to zero.</description>
+	<operation>
+tmp := a
+dst := 0
+m := 0
+k := 0
+DO WHILE m &lt; 64
+	IF mask[m] == 1
+		dst[k] := tmp[m]
+		k := k + 1
+	FI
+	m := m + 1
+OD
+	</operation>
+	<instruction name="PEXT" form="r64, r64, r64" xed="PEXT_VGPR64q_VGPR64q_VGPR64q"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mulx_u32">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Arithmetic</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<parameter type="unsigned int*" varname="hi" etype="UI32" memwidth="32"/>
+	<description>Multiply unsigned 32-bit integers "a" and "b", store the low 32-bits of the result in "dst", and store the high 32-bits in "hi". This does not read or write arithmetic flags.</description>
+	<operation>
+dst[31:0] := (a * b)[31:0]
+MEM[hi+31:hi] := (a * b)[63:32]
+	</operation>
+	<instruction name="MULX" form="r32, r32, m32" xed="MULX_VGPR32d_VGPR32d_MEMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mulx_u64">
+	<type>Integer</type>
+	<CPUID>BMI2</CPUID>
+	<category>Arithmetic</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<parameter type="unsigned __int64*" varname="hi" etype="UI64" memwidth="64"/>
+	<description>Multiply unsigned 64-bit integers "a" and "b", store the low 64-bits of the result in "dst", and store the high 64-bits in "hi". This does not read or write arithmetic flags.</description>
+	<operation>
+dst[63:0] := (a * b)[63:0]
+MEM[hi+63:hi]  := (a * b)[127:64]
+	</operation>
+	<instruction name="MULX" form="r64, r64, m64" xed="MULX_VGPR64q_VGPR64q_MEMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_incsspd">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a".</description>
+	<operation>
+SSP := SSP + a[7:0] * 4
+	</operation>
+	<instruction name="INCSSPD" form="r32" xed="INCSSPD_GPR32u8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_incsspq">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Increment the shadow stack pointer by 8 times the value specified in bits [7:0] of "a".</description>
+	<operation>
+SSP := SSP + a[7:0] * 8
+	</operation>
+	<instruction name="INCSSPQ" form="r64" xed="INCSSPQ_GPR64u8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdsspd_i32">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__int32" varname="dst" etype="UI32"/>
+	<parameter type="void"/>
+	<description>Read the low 32-bits of the current shadow stack pointer, and store the result in "dst".</description>
+	<operation>dst := SSP[31:0]
+	</operation>
+	<instruction name="RDSSPD" form="r32" xed="RDSSPD_GPR32u32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdsspq_i64">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="void"/>
+	<description>Read the current shadow stack pointer, and store the result in "dst".</description>
+	<operation>dst := SSP[63:0]
+	</operation>
+	<instruction name="RDSSPQ" form="r64" xed="RDSSPQ_GPR64u64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_saveprevssp">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Save the previous shadow stack pointer context.</description>
+	<instruction name="SAVEPREVSSP" xed="SAVEPREVSSP"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rstorssp">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void *" varname="p"/>
+	<description>Restore the saved shadow stack pointer from the shadow stack restore token previously created on shadow stack by saveprevssp.</description>
+	<instruction name="RSTORSSP" form="m64" xed="RSTORSSP_MEMu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_wrssd">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="__int32" varname="val" etype="UI32"/>
+	<parameter type="void *" varname="p"/>
+	<description>Write 32-bit value in "val" to a shadow stack page in memory specified by "p".</description>
+	<instruction name="WRSSD" form="m32, r32" xed="WRSSD_MEMu32_GPR32u32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_wrssq">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="__int64" varname="val" etype="UI64"/>
+	<parameter type="void *" varname="p"/>
+	<description>Write 64-bit value in "val" to a shadow stack page in memory specified by "p".</description>
+	<instruction name="WRSSQ" form="m64, r64" xed="WRSSQ_MEMu64_GPR64u64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_wrussd">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="__int32" varname="val" etype="UI32"/>
+	<parameter type="void *" varname="p"/>
+	<description>Write 32-bit value in "val" to a user shadow stack page in memory specified by "p".</description>
+	<instruction name="WRUSSD" form="m32, r32" xed="WRUSSD_MEMu32_GPR32u32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_wrussq">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="__int64" varname="val" etype="UI64"/>
+	<parameter type="void *" varname="p"/>
+	<description>Write 64-bit value in "val" to a user shadow stack page in memory specified by "p".</description>
+	<instruction name="WRUSSQ" form="m64, r64" xed="WRUSSQ_MEMu64_GPR64u64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_setssbsy">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Mark shadow stack pointed to by IA32_PL0_SSP as busy.</description>
+	<instruction name="SETSSBSY" xed="SETSSBSY"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_clrssbsy">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void *" varname="p"/>
+	<description>Mark shadow stack pointed to by "p" as not busy.</description>
+	<instruction name="CLRSSBSY" form="m64" xed="CLRSSBSY_MEMu64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_get_ssp">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__int32" varname="dst" etype="UI32"/>
+	<parameter type="void"/>
+	<description>If CET is enabled, read the low 32-bits of the current shadow stack pointer, and store the result in "dst". Otherwise return 0.</description>
+	<operation>dst := SSP[31:0]
+	</operation>
+	<instruction name="RDSSPD" form="r32" xed="RDSSPD_GPR32u32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_get_ssp">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="void"/>
+	<description>If CET is enabled, read the current shadow stack pointer, and store the result in "dst". Otherwise return 0.</description>
+	<operation>dst := SSP[63:0]
+	</operation>
+	<instruction name="RDSSPQ" form="r64" xed="RDSSPQ_GPR64u64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_inc_ssp">
+	<CPUID>CET_SS</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Increment the shadow stack pointer by 4 times the value specified in bits [7:0] of "a".</description>
+	<operation>
+SSP := SSP + a[7:0] * 4
+	</operation>
+	<instruction name="INCSSPD" form="r32" xed="INCSSPD_GPR32u8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_cldemote">
+	<CPUID>CLDEMOTE</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void const *" varname="p"/>
+	<description>Hint to hardware that the cache line that contains "p" should be demoted from the cache closest to the processor core to a level more distant from the processor core.</description>
+	<instruction name="CLDEMOTE" form="m8" xed="CLDEMOTE_MEMu8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_clflushopt">
+	<CPUID>CLFLUSHOPT</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void const *" varname="p"/>
+	<description>Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy.</description>
+	<instruction name="CLFLUSHOPT" form="m8" xed="CLFLUSHOPT_MEMmprefetch"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_clwb">
+	<CPUID>CLWB</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void const *" varname="p"/>
+	<description>Write back to memory the cache line that contains "p" from any level of the cache hierarchy in the cache coherence domain.</description>
+	<instruction name="CLWB" form="m8" xed="CLWB_MEMmprefetch"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="xmm, xmm, xmm" xed="VFMADD132PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADD213PD" form="xmm, xmm, xmm" xed="VFMADD213PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADD231PD" form="xmm, xmm, xmm" xed="VFMADD231PD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PD" form="ymm, ymm, ymm" xed="VFMADD132PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADD213PD" form="ymm, ymm, ymm" xed="VFMADD213PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADD231PD" form="ymm, ymm, ymm" xed="VFMADD231PD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="xmm, xmm, xmm" xed="VFMADD132PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADD213PS" form="xmm, xmm, xmm" xed="VFMADD213PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADD231PS" form="xmm, xmm, xmm" xed="VFMADD231PS_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADD132PS" form="ymm, ymm, ymm" xed="VFMADD132PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADD213PS" form="ymm, ymm, ymm" xed="VFMADD213PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADD231PS" form="ymm, ymm, ymm" xed="VFMADD231PS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SD" form="xmm, xmm, xmm" xed="VFMADD132SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFMADD213SD" form="xmm, xmm, xmm" xed="VFMADD213SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFMADD231SD" form="xmm, xmm, xmm" xed="VFMADD231SD_XMMdq_XMMq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADD132SS" form="xmm, xmm, xmm" xed="VFMADD132SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFMADD213SS" form="xmm, xmm, xmm" xed="VFMADD213SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFMADD231SS" form="xmm, xmm, xmm" xed="VFMADD231SS_XMMdq_XMMd_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF ((j &amp; 1) == 0) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="xmm, xmm, xmm" xed="VFMADDSUB132PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADDSUB213PD" form="xmm, xmm, xmm" xed="VFMADDSUB213PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADDSUB231PD" form="xmm, xmm, xmm" xed="VFMADDSUB231PD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmaddsub_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF ((j &amp; 1) == 0) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PD" form="ymm, ymm, ymm" xed="VFMADDSUB132PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADDSUB213PD" form="ymm, ymm, ymm" xed="VFMADDSUB213PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADDSUB231PD" form="ymm, ymm, ymm" xed="VFMADDSUB231PD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF ((j &amp; 1) == 0) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="xmm, xmm, xmm" xed="VFMADDSUB132PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADDSUB213PS" form="xmm, xmm, xmm" xed="VFMADDSUB213PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMADDSUB231PS" form="xmm, xmm, xmm" xed="VFMADDSUB231PS_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmaddsub_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively add and subtract packed elements in "c" to/from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF ((j &amp; 1) == 0) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMADDSUB132PS" form="ymm, ymm, ymm" xed="VFMADDSUB132PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADDSUB213PS" form="ymm, ymm, ymm" xed="VFMADDSUB213PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMADDSUB231PS" form="ymm, ymm, ymm" xed="VFMADDSUB231PS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="xmm, xmm, xmm" xed="VFMSUB132PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUB213PD" form="xmm, xmm, xmm" xed="VFMSUB213PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUB231PD" form="xmm, xmm, xmm" xed="VFMSUB231PD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PD" form="ymm, ymm, ymm" xed="VFMSUB132PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUB213PD" form="ymm, ymm, ymm" xed="VFMSUB213PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUB231PD" form="ymm, ymm, ymm" xed="VFMSUB231PD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="xmm, xmm, xmm" xed="VFMSUB132PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUB213PS" form="xmm, xmm, xmm" xed="VFMSUB213PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUB231PS" form="xmm, xmm, xmm" xed="VFMSUB231PS_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUB132PS" form="ymm, ymm, ymm" xed="VFMSUB132PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUB213PS" form="ymm, ymm, ymm" xed="VFMSUB213PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUB231PS" form="ymm, ymm, ymm" xed="VFMSUB231PS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SD" form="xmm, xmm, xmm" xed="VFMSUB132SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFMSUB213SD" form="xmm, xmm, xmm" xed="VFMSUB213SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFMSUB231SD" form="xmm, xmm, xmm" xed="VFMSUB231SD_XMMdq_XMMq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUB132SS" form="xmm, xmm, xmm" xed="VFMSUB132SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFMSUB213SS" form="xmm, xmm, xmm" xed="VFMSUB213SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFMSUB231SS" form="xmm, xmm, xmm" xed="VFMSUB231SS_XMMdq_XMMd_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF ((j &amp; 1) == 0) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="xmm, xmm, xmm" xed="VFMSUBADD132PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUBADD213PD" form="xmm, xmm, xmm" xed="VFMSUBADD213PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUBADD231PD" form="xmm, xmm, xmm" xed="VFMSUBADD231PD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmsubadd_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	IF ((j &amp; 1) == 0) 
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
+	ELSE
+		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PD" form="ymm, ymm, ymm" xed="VFMSUBADD132PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUBADD213PD" form="ymm, ymm, ymm" xed="VFMSUBADD213PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUBADD231PD" form="ymm, ymm, ymm" xed="VFMSUBADD231PD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF ((j &amp; 1) == 0) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="xmm, xmm, xmm" xed="VFMSUBADD132PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUBADD213PS" form="xmm, xmm, xmm" xed="VFMSUBADD213PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFMSUBADD231PS" form="xmm, xmm, xmm" xed="VFMSUBADD231PS_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fmsubadd_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", alternatively subtract and add packed elements in "c" from/to the intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	IF ((j &amp; 1) == 0) 
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFMSUBADD132PS" form="ymm, ymm, ymm" xed="VFMSUBADD132PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUBADD213PS" form="ymm, ymm, ymm" xed="VFMSUBADD213PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFMSUBADD231PS" form="ymm, ymm, ymm" xed="VFMSUBADD231PS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="xmm, xmm, xmm" xed="VFNMADD132PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMADD213PD" form="xmm, xmm, xmm" xed="VFNMADD213PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMADD231PD" form="xmm, xmm, xmm" xed="VFNMADD231PD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fnmadd_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PD" form="ymm, ymm, ymm" xed="VFNMADD132PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMADD213PD" form="ymm, ymm, ymm" xed="VFNMADD213PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMADD231PD" form="ymm, ymm, ymm" xed="VFNMADD231PD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="xmm, xmm, xmm" xed="VFNMADD132PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMADD213PS" form="xmm, xmm, xmm" xed="VFNMADD213PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMADD231PS" form="xmm, xmm, xmm" xed="VFNMADD231PS_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fnmadd_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", add the negated intermediate result to packed elements in "c", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMADD132PS" form="ymm, ymm, ymm" xed="VFNMADD132PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMADD213PS" form="ymm, ymm, ymm" xed="VFNMADD213PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMADD231PS" form="ymm, ymm, ymm" xed="VFNMADD231PS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmadd_sd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SD" form="xmm, xmm, xmm" xed="VFNMADD132SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFNMADD213SD" form="xmm, xmm, xmm" xed="VFNMADD213SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFNMADD231SD" form="xmm, xmm, xmm" xed="VFNMADD231SD_XMMdq_XMMq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmadd_ss">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and add the negated intermediate result to the lower element in "c". Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMADD132SS" form="xmm, xmm, xmm" xed="VFNMADD132SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFNMADD213SS" form="xmm, xmm, xmm" xed="VFNMADD213SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFNMADD231SS" form="xmm, xmm, xmm" xed="VFNMADD231SS_XMMdq_XMMd_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="xmm, xmm, xmm" xed="VFNMSUB132PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMSUB213PD" form="xmm, xmm, xmm" xed="VFNMSUB213PD_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMSUB231PD" form="xmm, xmm, xmm" xed="VFNMSUB231PD_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fnmsub_pd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256d" varname="dst" etype="FP64"/>
+	<parameter type="__m256d" varname="a" etype="FP64"/>
+	<parameter type="__m256d" varname="b" etype="FP64"/>
+	<parameter type="__m256d" varname="c" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*64
+	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PD" form="ymm, ymm, ymm" xed="VFNMSUB132PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMSUB213PD" form="ymm, ymm, ymm" xed="VFNMSUB213PD_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMSUB231PD" form="ymm, ymm, ymm" xed="VFNMSUB231PD_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="xmm, xmm, xmm" xed="VFNMSUB132PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMSUB213PS" form="xmm, xmm, xmm" xed="VFNMSUB213PS_XMMdq_XMMdq_XMMdq"/>
+	<instruction name="VFNMSUB231PS" form="xmm, xmm, xmm" xed="VFNMSUB231PS_XMMdq_XMMdq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm256_fnmsub_ps">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="__m256" varname="b" etype="FP32"/>
+	<parameter type="__m256" varname="c" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", subtract packed elements in "c" from the negated intermediate result, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
+ENDFOR	
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VFNMSUB132PS" form="ymm, ymm, ymm" xed="VFNMSUB132PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMSUB213PS" form="ymm, ymm, ymm" xed="VFNMSUB213PS_YMMqq_YMMqq_YMMqq"/>
+	<instruction name="VFNMSUB231PS" form="ymm, ymm, ymm" xed="VFNMSUB231PS_YMMqq_YMMqq_YMMqq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmsub_sd">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="c" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SD" form="xmm, xmm, xmm" xed="VFNMSUB132SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFNMSUB213SD" form="xmm, xmm, xmm" xed="VFNMSUB213SD_XMMdq_XMMq_XMMq"/>
+	<instruction name="VFNMSUB231SD" form="xmm, xmm, xmm" xed="VFNMSUB231SD_XMMdq_XMMq_XMMq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="FMA" name="_mm_fnmsub_ss">
+	<type>Floating Point</type>
+	<CPUID>FMA</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="c" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point elements in "a" and "b", and subtract the lower element in "c" from the negated intermediate result. Store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VFNMSUB132SS" form="xmm, xmm, xmm" xed="VFNMSUB132SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFNMSUB213SS" form="xmm, xmm, xmm" xed="VFNMSUB213SS_XMMdq_XMMd_XMMd"/>
+	<instruction name="VFNMSUB231SS" form="xmm, xmm, xmm" xed="VFNMSUB231SS_XMMdq_XMMd_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<return type="__m256" varname="dst" etype="FP32"/>
+	<parameter type="__m128i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="ymm, xmm" xed="VCVTPH2PS_YMMqq_XMMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="FP16"/>
+	<parameter type="__m256" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
+	[sae_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm, ymm, imm8" xed="VCVTPS2PH_XMMdq_YMMqq_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_cvtph_ps">
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128i" varname="a" etype="FP16"/>
+	<description>Convert packed half-precision (16-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	m := j*16
+	dst[i+31:i] := Convert_FP16_To_FP32(a[m+15:m])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VCVTPH2PS" form="xmm, xmm" xed="VCVTPH2PS_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_cvtps_ph">
+	<type>Floating Point</type>
+	<CPUID>FP16C</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="FP16"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="sae" etype="IMM" hint="TRUE" immtype="_MM_FROUND_SAE"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed half-precision (16-bit) floating-point elements, and store the results in "dst".
+	[sae_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	l := 32*j
+	dst[i+15:i] := Convert_FP32_To_FP16(a[l+31:l])
+ENDFOR
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="VCVTPS2PH" form="xmm, xmm, imm8" xed="VCVTPS2PH_XMMq_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_readfsbase_u32">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<description>Read the FS segment base register and store the 32-bit result in "dst".</description>
+	<operation>dst[31:0] := FS_Segment_Base_Register
+dst[63:32] := 0
+	</operation>
+	<instruction name="RDFSBASE" form="r32" xed="RDFSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_readfsbase_u64">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<description>Read the FS segment base register and store the 64-bit result in "dst".</description>
+	<operation>dst[63:0] := FS_Segment_Base_Register
+	</operation>
+	<instruction name="RDFSBASE" form="r64" xed="RDFSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_readgsbase_u32">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<description>Read the GS segment base register and store the 32-bit result in "dst".</description>
+	<operation>dst[31:0] := GS_Segment_Base_Register
+dst[63:32] := 0
+	</operation>
+	<instruction name="RDGSBASE" form="r32" xed="RDGSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_readgsbase_u64">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<description>Read the GS segment base register and store the 64-bit result in "dst".</description>
+	<operation>dst[63:0] := GS_Segment_Base_Register
+	</operation>
+	<instruction name="RDGSBASE" form="r64" xed="RDGSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_writefsbase_u32">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Write the unsigned 32-bit integer "a" to the FS segment base register.</description>
+	<operation>
+FS_Segment_Base_Register[31:0] := a[31:0]
+FS_Segment_Base_Register[63:32] := 0
+	</operation>
+	<instruction name="WRFSBASE" form="r32" xed="WRFSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_writefsbase_u64">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Write the unsigned 64-bit integer "a" to the FS segment base register.</description>
+	<operation>
+FS_Segment_Base_Register[63:0] := a[63:0]
+	</operation>
+	<instruction name="WRFSBASE" form="r64" xed="WRFSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_writegsbase_u32">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Write the unsigned 32-bit integer "a" to the GS segment base register.</description>
+	<operation>
+GS_Segment_Base_Register[31:0] := a[31:0]
+GS_Segment_Base_Register[63:32] := 0
+	</operation>
+	<instruction name="WRGSBASE" form="r32" xed="WRGSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_writegsbase_u64">
+	<type>Integer</type>
+	<CPUID>FSGSBASE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Write the unsigned 64-bit integer "a" to the GS segment base register.</description>
+	<operation>
+GS_Segment_Base_Register[63:0] := a[63:0]
+	</operation>
+	<instruction name="WRGSBASE" form="r64" xed="WRGSBASE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_fxrstor">
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr" memwidth="4096"/>
+	<description>Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary.</description>
+	<operation>state_x87_fpu_mmx_sse := fxrstor(MEM[mem_addr+512*8:mem_addr])
+	</operation>
+	<instruction name="FXRSTOR" form="m512" xed="FXRSTOR_MEMmfpxenv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_fxrstor64">
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr" memwidth="4096"/>
+	<description>Reload the x87 FPU, MMX technology, XMM, and MXCSR registers from the 512-byte memory image at "mem_addr". This data should have been written to memory previously using the FXSAVE64 instruction, and in the same format as required by the operating mode. "mem_addr" must be aligned on a 16-byte boundary.</description>
+	<operation>state_x87_fpu_mmx_sse := fxrstor64(MEM[mem_addr+512*8:mem_addr])
+	</operation>
+	<instruction name="FXRSTOR64" form="m512" xed="FXRSTOR64_MEMmfpxenv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_fxsave">
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr" memwidth="4096"/>
+	<description>Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor.</description>
+	<operation>MEM[mem_addr+512*8:mem_addr] := fxsave(state_x87_fpu_mmx_sse)
+	</operation>
+	<instruction name="FXSAVE" form="m512" xed="FXSAVE_MEMmfpxenv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_fxsave64">
+	<CPUID>FXSR</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr" memwidth="4096"/>
+	<description>Save the current state of the x87 FPU, MMX technology, XMM, and MXCSR registers to a 512-byte memory location at "mem_addr". The layout of the 512-byte region depends on the operating mode. Bytes [511:464] are available for software use and will not be overwritten by the processor.</description>
+	<operation>MEM[mem_addr+512*8:mem_addr] := fxsave64(state_x87_fpu_mmx_sse)
+	</operation>
+	<instruction name="FXSAVE64" form="m512" xed="FXSAVE64_MEMmfpxenv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_maskz_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 63
+	IF k[j]
+		dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+	ELSE
+		dst.byte[j] := 0
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="zmm {z}, zmm, zmm" xed="VGF2P8MULB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_mask_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="src" etype="UI8"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 63
+	IF k[j]
+		dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+	ELSE
+		dst.byte[j] := src.byte[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="zmm {k}, zmm, zmm" xed="VGF2P8MULB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI8"/>
+	<parameter type="__m512i" varname="a" etype="UI8"/>
+	<parameter type="__m512i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 63
+	dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="zmm, zmm, zmm" xed="VGF2P8MULB_ZMMu8_MASKmskw_ZMMu8_ZMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_maskz_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 31
+	IF k[j]
+		dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+	ELSE
+		dst.byte[j] := 0
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="ymm {z}, ymm, ymm" xed="VGF2P8MULB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_mask_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="src" etype="UI8"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 31
+	IF k[j]
+		dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+	ELSE
+		dst.byte[j] := src.byte[j]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="ymm {k}, ymm, ymm" xed="VGF2P8MULB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI8"/>
+	<parameter type="__m256i" varname="a" etype="UI8"/>
+	<parameter type="__m256i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 31
+	dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="ymm, ymm, ymm" xed="VGF2P8MULB_YMMu8_MASKmskw_YMMu8_YMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_maskz_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 15
+	IF k[j]
+		dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+	ELSE
+		dst.byte[j] := 0
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="xmm {z}, xmm, xmm" xed="VGF2P8MULB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_mask_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="src" etype="UI8"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst" using writemask "k" (elements are copied from "src"" when the corresponding mask bit is not set). The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 15
+	IF k[j]
+		dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+	ELSE
+		dst.byte[j] := src.byte[j]
+	FI
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="xmm {k}, xmm, xmm" xed="VGF2P8MULB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_gf2p8mul_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Multiply the packed 8-bit integers in "a" and "b" in the finite field GF(2^8), and store the results in "dst". The field GF(2^8) is represented in polynomial representation with the reduction polynomial x^8 + x^4 + x^3 + x + 1.</description>
+	<operation>
+DEFINE gf2p8mul_byte(src1byte, src2byte) {
+	tword := 0
+	FOR i := 0 to 7
+		IF src2byte.bit[i]
+			tword := tword XOR (src1byte &lt;&lt; i)
+		FI
+	ENDFOR
+	FOR i := 14 downto 8
+		p := 0x11B &lt;&lt; (i-8)
+		IF tword.bit[i]
+			tword := tword XOR p
+		FI
+	ENDFOR
+	RETURN tword.byte[0]
+}
+FOR j := 0 TO 15
+	dst.byte[j] := gf2p8mul_byte(a.byte[j], b.byte[j])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8MULB" form="xmm, xmm, xmm" xed="VGF2P8MULB_XMMu8_MASKmskw_XMMu8_XMMu8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_maskz_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="x" etype="UI64"/>
+	<parameter type="__m512i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 7
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="zmm {z}, zmm, zmm, imm8" xed="VGF2P8AFFINEQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_mask_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="x" etype="UI64"/>
+	<parameter type="__m512i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 7
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := src.qword[j].byte[i]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="zmm {k}, zmm, zmm, imm8" xed="VGF2P8AFFINEQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="x" etype="UI64"/>
+	<parameter type="__m512i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst".</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 7
+	FOR i := 0 to 7
+		dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="zmm, zmm, zmm, imm8" xed="VGF2P8AFFINEQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_maskz_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="x" etype="UI64"/>
+	<parameter type="__m256i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 3
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="ymm {z}, ymm, ymm, imm8" xed="VGF2P8AFFINEQB_YMMu8_MASKmskw_YMMu8_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_mask_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="x" etype="UI64"/>
+	<parameter type="__m256i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 3
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := src.qword[j].byte[i]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="ymm {k}, ymm, ymm, imm8" xed="VGF2P8AFFINEQB_YMMu8_MASKmskw_YMMu8_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="x" etype="UI64"/>
+	<parameter type="__m256i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst".</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 3
+	FOR i := 0 to 7
+		dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="ymm, ymm, ymm, imm8" xed="VGF2P8AFFINEQB_YMMu8_MASKmskw_YMMu8_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_maskz_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="x" etype="UI64"/>
+	<parameter type="__m128i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 1
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="xmm {z}, xmm, xmm, imm8" xed="VGF2P8AFFINEQB_XMMu8_MASKmskw_XMMu8_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_mask_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="x" etype="UI64"/>
+	<parameter type="__m128i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 1
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := src.qword[j].byte[i]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="xmm {k}, xmm, xmm, imm8" xed="VGF2P8AFFINEQB_XMMu8_MASKmskw_XMMu8_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_gf2p8affine_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="x" etype="UI64"/>
+	<parameter type="__m128i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. Store the packed 8-bit results in "dst".</description>
+	<operation>
+DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND src1byte) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 1
+	FOR i := 0 to 7
+		dst.qword[j].byte[i] := affine_byte(A.qword[j], x.qword[j].byte[i], b)
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEQB" form="xmm, xmm, xmm, imm8" xed="VGF2P8AFFINEQB_XMMu8_MASKmskw_XMMu8_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_maskz_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="x" etype="UI64"/>
+	<parameter type="__m512i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 7
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="zmm {z}, zmm, zmm, imm8" xed="VGF2P8AFFINEINVQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_mask_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask64" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="x" etype="UI64"/>
+	<parameter type="__m512i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 7
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := src.qword[j].byte[b]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="zmm {k}, zmm, zmm, imm8" xed="VGF2P8AFFINEINVQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512F</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="x" etype="UI64"/>
+	<parameter type="__m512i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst".</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 7
+	FOR i := 0 to 7
+		dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+	ENDFOR
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="zmm, zmm, zmm, imm8" xed="VGF2P8AFFINEINVQB_ZMMu8_MASKmskw_ZMMu8_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_maskz_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="x" etype="UI64"/>
+	<parameter type="__m256i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 3
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="ymm {z}, ymm, ymm, imm8" xed="VGF2P8AFFINEINVQB_YMMu8_MASKmskw_YMMu8_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_mask_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="src" etype="UI64"/>
+	<parameter type="__mmask32" varname="k" etype="MASK"/>
+	<parameter type="__m256i" varname="x" etype="UI64"/>
+	<parameter type="__m256i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 3
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := src.qword[j].byte[i]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="ymm {k}, ymm, ymm, imm8" xed="VGF2P8AFFINEINVQB_YMMu8_MASKmskw_YMMu8_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m256i" varname="dst" etype="UI64"/>
+	<parameter type="__m256i" varname="x" etype="UI64"/>
+	<parameter type="__m256i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst".</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 3
+	FOR i := 0 to 7
+		dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+	ENDFOR
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="ymm, ymm, ymm, imm8" xed="VGF2P8AFFINEINVQB_YMMu8_MASKmskw_YMMu8_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_maskz_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="x" etype="UI64"/>
+	<parameter type="__m128i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using zeromask "k" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 1
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := 0
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="xmm {z}, xmm, xmm, imm8" xed="VGF2P8AFFINEINVQB_XMMu8_MASKmskw_XMMu8_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_mask_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="src" etype="UI64"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m128i" varname="x" etype="UI64"/>
+	<parameter type="__m128i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 1
+	FOR i := 0 to 7
+		IF k[j*8+i]
+			dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+		ELSE
+			dst.qword[j].byte[i] := src.qword[j].byte[i]
+		FI
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="xmm {k}, xmm, xmm, imm8" xed="VGF2P8AFFINEINVQB_XMMu8_MASKmskw_XMMu8_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_gf2p8affineinv_epi64_epi8">
+	<type>Integer</type>
+	<CPUID>GFNI</CPUID>
+	<CPUID>AVX512VL</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="x" etype="UI64"/>
+	<parameter type="__m128i" varname="A" etype="UI64"/>
+	<parameter type="int" varname="b" etype="IMM" immwidth="8"/>
+	<description>Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by "A" * "x" + "b", where "A" represents an 8 by 8 bit matrix, "x" represents an 8-bit vector, and "b" is a constant immediate byte. The inverse of the 8-bit values in "x" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in "dst".</description>
+	<operation>DEFINE parity(x) {
+	t := 0
+	FOR i := 0 to 7
+		t := t XOR x.bit[i]
+	ENDFOR
+	RETURN t
+}
+DEFINE affine_inverse_byte(tsrc2qw, src1byte, imm8) {
+	FOR i := 0 to 7
+		retbyte.bit[i] := parity(tsrc2qw.byte[7-i] AND inverse(src1byte)) XOR imm8.bit[i]
+	ENDFOR
+	RETURN retbyte
+}
+FOR j := 0 TO 1
+	FOR i := 0 to 7
+		dst.qword[j].byte[i] := affine_inverse_byte(A.qword[j], x.qword[j].byte[i], b)
+	ENDFOR
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="VGF2P8AFFINEINVQB" form="xmm, xmm, xmm, imm8" xed="VGF2P8AFFINEINVQB_XMMu8_MASKmskw_XMMu8_XMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_invpcid">
+	<CPUID>INVPCID</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="type" etype="UI32"/>
+	<parameter type="void*" varname="descriptor" memwidth="128"/>
+	<description>Invalidate mappings in the Translation Lookaside Buffers (TLBs) and paging-structure caches for the processor context identifier (PCID) specified by "descriptor" based on the invalidation type specified in "type". 
+	The PCID "descriptor" is specified as a 16-byte memory operand (with no alignment restrictions) where bits [11:0] specify the PCID, and bits [127:64] specify the linear address; bits [63:12] are reserved.
+	The types supported are:
+		0) Individual-address invalidation: If "type" is 0, the logical processor invalidates mappings for a single linear address and tagged with the PCID specified in "descriptor", except global translations. The instruction may also invalidate global translations, mappings for other linear addresses, or mappings tagged with other PCIDs.
+		1) Single-context invalidation: If "type" is 1, the logical processor invalidates all mappings tagged with the PCID specified in "descriptor" except global translations. In some cases, it may invalidate mappings for other PCIDs as well.
+		2) All-context invalidation: If "type" is 2, the logical processor invalidates all mappings tagged with any PCID.
+		3) All-context invalidation, retaining global translations: If "type" is 3, the logical processor invalidates all mappings tagged with any PCID except global translations, ignoring "descriptor". The instruction may also invalidate global translations as well.</description>
+	<operation>
+CASE type[1:0] OF
+0: // individual-address invalidation retaining global translations
+	OP_PCID := MEM[descriptor+11:descriptor]
+	ADDR := MEM[descriptor+127:descriptor+64]
+	BREAK
+1: // single PCID invalidation retaining globals
+	OP_PCID := MEM[descriptor+11:descriptor]
+	// invalidate all mappings tagged with OP_PCID except global translations
+	BREAK
+2: // all PCID invalidation
+	// invalidate all mappings tagged with any PCID
+	BREAK
+3: // all PCID invalidation retaining global translations
+	// invalidate all mappings tagged with any PCID except global translations
+	BREAK
+ESAC
+	</operation>
+	<instruction name="INVPCID" form="r32, m128" xed="INVPCID_GPR32_MEMdq"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_prefetch">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="char const*" varname="p" etype="UI8"/>
+	<parameter type="int" varname="i" etype="IMM" immwidth="2"/>
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<instruction name="VPREFETCH0" form="m8"/>
+	<instruction name="VPREFETCH1" form="m8"/>
+	<instruction name="VPREFETCH2" form="m8"/>
+	<instruction name="VPREFETCHNTA" form="m8"/>
+	<instruction name="VPREFETCHE0" form="m8"/>
+	<instruction name="VPREFETCHE1" form="m8"/>
+	<instruction name="VPREFETCHE2" form="m8"/>
+	<instruction name="VPREFETCHENTA" form="m8"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kandn">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise NOT of 16-bit masks "a" and then AND with "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := (NOT a[15:0]) AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KANDN" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kand">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise AND of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] AND b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KAND" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kmov">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<description>Copy 16-bit mask "a" to "k".</description>
+	<operation>
+k[15:0] := a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KMOV" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_knot">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<description>Compute the bitwise NOT of 16-bit mask "a", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT a[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KNOT" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kor">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise OR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] OR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KOR" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kxnor">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XNOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := NOT (a[15:0] XOR b[15:0])
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KXNOR" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kxor">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="a" etype="MASK"/>
+	<parameter type="__mmask16" varname="b" etype="MASK"/>
+	<description>Compute the bitwise XOR of 16-bit masks "a" and "b", and store the result in "k".</description>
+	<operation>
+k[15:0] := a[15:0] XOR b[15:0]
+k[MAX:16] := 0
+	</operation>
+	<instruction name="KXOR" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in mask vector "k".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPLTD" form="k, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_cmplt_epi32_mask">
+	<type>Integer</type>
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Compare</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="SI32"/>
+	<parameter type="__m512i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than-or-equal, and store the results in mask vector "k" using zeromask "k1" (elements are zeroed out when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k[j] := ( a[i+31:i] &lt; b[i+31:i] ) ? 1 : 0
+	ELSE 
+		k[j] := 0
+	FI
+ENDFOR
+k[MAX:16] := 0
+	</operation>
+	<instruction name="VPCMPLTD" form="k {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extload_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="void const *" varname="mt" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="_MM_BROADCAST32_ENUM" varname="bc" etype="UI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	CASE bc OF
+	_MM_BROADCAST32_NONE:
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_PS_FLOAT16:
+			n	 := j*16
+			dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
+		_MM_UPCONV_PS_UINT8:
+			n	 := j*8
+			dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
+		_MM_UPCONV_PS_SINT8:
+			n	 := j*8
+			dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
+		_MM_UPCONV_PS_UINT16:
+			n	 := j*16
+			dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
+		_MM_UPCONV_PS_SINT16:
+			n	 := j*16
+			dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
+		ESAC
+	_MM_BROADCAST_1X16:
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[31:0]
+		_MM_UPCONV_PS_FLOAT16:
+			n	 := j*16
+			dst[i+31:i] := Convert_FP16_To_FP32(addr[15:0])
+		_MM_UPCONV_PS_UINT8:
+			n	 := j*8
+			dst[i+31:i] := Convert_UInt8_To_FP32(addr[7:0])
+		_MM_UPCONV_PS_SINT8:
+			n	 := j*8
+			dst[i+31:i] := Convert_Int8_To_FP32(addr[7:0])
+		_MM_UPCONV_PS_UINT16:
+			n	 := j*16
+			dst[i+31:i] := Convert_UInt16_To_FP32(addr[15:0])
+		_MM_UPCONV_PS_SINT16:
+			n	 := j*16
+			dst[i+31:i] := Convert_Int16_To_FP32(addr[15:0])
+		ESAC
+	_MM_BROADCAST_4X16:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:
+			n := mod*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_PS_FLOAT16:
+			n := mod*16
+			dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
+		_MM_UPCONV_PS_UINT8:
+			n := mod*8
+			dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
+		_MM_UPCONV_PS_SINT8:
+			n := mod*8
+			dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
+		_MM_UPCONV_PS_UINT16:
+			n := mod*16
+			dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
+		_MM_UPCONV_PS_SINT16:
+			n := mod*16
+			dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="zmm, m512" xed="VMOVAPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<instruction name="VBROADCASTF32X4" form="zmm, m512" xed="VBROADCASTF32X4_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<instruction name="VBROADCASTSS" form="zmm, m512" xed="VBROADCASTSS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extload_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="_MM_BROADCAST32_ENUM" varname="bc" etype="UI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST32_NONE:
+			CASE conv OF
+			_MM_UPCONV_PS_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_PS_FLOAT16:
+				n	 := j*16
+				dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
+			_MM_UPCONV_PS_UINT8:
+				n	 := j*8
+				dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
+			_MM_UPCONV_PS_SINT8:
+				n	 := j*8
+				dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
+			_MM_UPCONV_PS_UINT16:
+				n	 := j*16
+				dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
+			_MM_UPCONV_PS_SINT16:
+				n	 := j*16
+				dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
+			ESAC
+		_MM_BROADCAST_1X16:
+			CASE conv OF
+			_MM_UPCONV_PS_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[31:0]
+			_MM_UPCONV_PS_FLOAT16:
+				n	 := j*16
+				dst[i+31:i] := Convert_FP16_To_FP32(addr[15:0])
+			_MM_UPCONV_PS_UINT8:
+				n	 := j*8
+				dst[i+31:i] := Convert_UInt8_To_FP32(addr[7:0])
+			_MM_UPCONV_PS_SINT8:
+				n	 := j*8
+				dst[i+31:i] := Convert_Int8_To_FP32(addr[7:0])
+			_MM_UPCONV_PS_UINT16:
+				n	 := j*16
+				dst[i+31:i] := Convert_UInt16_To_FP32(addr[15:0])
+			_MM_UPCONV_PS_SINT16:
+				n	 := j*16
+				dst[i+31:i] := Convert_Int16_To_FP32(addr[15:0])
+			ESAC
+		_MM_BROADCAST_4X16:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_PS_NONE:
+				n := mod*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_PS_FLOAT16:
+				n := mod*16
+				dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
+			_MM_UPCONV_PS_UINT8:
+				n := mod*8
+				dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
+			_MM_UPCONV_PS_SINT8:
+				n := mod*8
+				dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
+			_MM_UPCONV_PS_UINT16:
+				n := mod*16
+				dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
+			_MM_UPCONV_PS_SINT16:
+				n := mod*16
+				dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
+			ESAC
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPS" form="zmm {k}, m512" xed="VMOVAPS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<instruction name="VBROADCASTF32X4" form="zmm {k}, m512" xed="VBROADCASTF32X4_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<instruction name="VBROADCASTSS" form="zmm {k}, m512" xed="VBROADCASTSS_ZMMf32_MASKmskw_MEMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extload_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="void const *" varname="mt" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="_MM_BROADCAST32_ENUM" varname="bc" etype="UI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 32-bit integer elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	CASE bc OF
+	_MM_BROADCAST32_NONE:
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_EPI32_UINT8:
+			n	 := j*8
+			dst[i+31:i] := ZeroExtend32(addr[n+7:n])
+		_MM_UPCONV_EPI32_SINT8:
+			n	 := j*8
+			dst[i+31:i] := SignExtend32(addr[n+7:n])
+		_MM_UPCONV_EPI32_UINT16:
+			n	 := j*16
+			dst[i+31:i] := ZeroExtend32(addr[n+15:n])
+		_MM_UPCONV_EPI32_SINT16:
+			n	 := j*16
+			dst[i+31:i] := SignExtend32(addr[n+15:n])
+		ESAC
+	_MM_BROADCAST_1X16:
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			n	 := j*32
+			dst[i+31:i] := addr[31:0]
+		_MM_UPCONV_EPI32_UINT8:
+			n	 := j*8
+			dst[i+31:i] := ZeroExtend32(addr[7:0])
+		_MM_UPCONV_EPI32_SINT8:
+			n	 := j*8
+			dst[i+31:i] := SignExtend32(addr[7:0])
+		_MM_UPCONV_EPI32_UINT16:
+			n	 := j*16
+			dst[i+31:i] := ZeroExtend32(addr[15:0])
+		_MM_UPCONV_EPI32_SINT16:
+			n	 := j*16
+			dst[i+31:i] := SignExtend32(addr[15:0])
+		ESAC
+	_MM_BROADCAST_4X16:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:
+			n := mod*32
+			dst[i+31:i] := addr[n+31:n]
+		_MM_UPCONV_EPI32_UINT8:
+			n := mod*8
+			dst[i+31:i] := ZeroExtend32(addr[n+7:n])
+		_MM_UPCONV_EPI32_SINT8:
+			n := mod*8
+			dst[i+31:i] := SignExtend32(addr[n+7:n])
+		_MM_UPCONV_EPI32_UINT16:
+			n := mod*16
+			dst[i+31:i] := ZeroExtend32(addr[n+15:n])
+		_MM_UPCONV_EPI32_SINT16:
+			n := mod*16
+			dst[i+31:i] := SignExtend32(addr[n+15:n])
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm, m512" xed="VMOVDQA32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<instruction name="VBROADCASTI32X4" form="zmm, m512" xed="VBROADCASTI32X4_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<instruction name="VPBROADCASTD" form="zmm, m512" xed="VPBROADCASTD_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extload_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="_MM_BROADCAST32_ENUM" varname="bc" etype="UI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 32-bit integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST32_NONE:
+			CASE conv OF
+			_MM_UPCONV_EPI32_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_EPI32_UINT8:
+				n	 := j*8
+				dst[i+31:i] := ZeroExtend32(addr[n+7:n])
+			_MM_UPCONV_EPI32_SINT8:
+				n	 := j*8
+				dst[i+31:i] := SignExtend32(addr[n+7:n])
+			_MM_UPCONV_EPI32_UINT16:
+				n	 := j*16
+				dst[i+31:i] := ZeroExtend32(addr[n+15:n])
+			_MM_UPCONV_EPI32_SINT16:
+				n	 := j*16
+				dst[i+31:i] := SignExtend32(addr[n+15:n])
+			ESAC
+		_MM_BROADCAST_1X16:
+			CASE conv OF
+			_MM_UPCONV_EPI32_NONE:
+				n	 := j*32
+				dst[i+31:i] := addr[31:0]
+			_MM_UPCONV_EPI32_UINT8:
+				n	 := j*8
+				dst[i+31:i] := ZeroExtend32(addr[7:0])
+			_MM_UPCONV_EPI32_SINT8:
+				n	 := j*8
+				dst[i+31:i] := SignExtend32(addr[7:0])
+			_MM_UPCONV_EPI32_UINT16:
+				n	 := j*16
+				dst[i+31:i] := ZeroExtend32(addr[15:0])
+			_MM_UPCONV_EPI32_SINT16:
+				n	 := j*16
+				dst[i+31:i] := SignExtend32(addr[15:0])
+			ESAC
+		_MM_BROADCAST_4X16:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_EPI32_NONE:
+				n := mod*32
+				dst[i+31:i] := addr[n+31:n]
+			_MM_UPCONV_EPI32_UINT8:
+				n := mod*8
+				dst[i+31:i] := ZeroExtend32(addr[n+7:n])
+			_MM_UPCONV_EPI32_SINT8:
+				n := mod*8
+				dst[i+31:i] := SignExtend32(addr[n+7:n])
+			_MM_UPCONV_EPI32_UINT16:
+				n := mod*16
+				dst[i+31:i] := ZeroExtend32(addr[n+15:n])
+			_MM_UPCONV_EPI32_SINT16:
+				n := mod*16
+				dst[i+31:i] := SignExtend32(addr[n+15:n])
+			ESAC
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA32" form="zmm {k}, m512" xed="VMOVDQA32_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<instruction name="VBROADCASTI32X4" form="zmm {k}, m512" xed="VBROADCASTI32X4_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<instruction name="VPBROADCASTD" form="zmm {k}, m512" xed="VPBROADCASTD_ZMMu32_MASKmskw_MEMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extload_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="void const *" varname="mt" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="_MM_BROADCAST64_ENUM" varname="bc" etype="UI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to double-precision (64-bit) floating-point elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE bc OF
+	_MM_BROADCAST64_NONE:
+		CASE conv OF
+		_MM_UPCONV_PD_NONE:
+			n := j*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	_MM_BROADCAST_1X8:
+		CASE conv OF
+		_MM_UPCONV_PD_NONE:
+			n := j*64
+			dst[i+63:i] := addr[63:0]
+		ESAC
+	_MM_BROADCAST_4X8:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_PD_NONE:
+			n := mod*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="zmm, m512" xed="VMOVAPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<instruction name="VBROADCASTF64X4" form="zmm, m512" xed="VBROADCASTF64X4_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<instruction name="VBROADCASTSD" form="zmm, m512" xed="VBROADCASTSD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extload_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="_MM_BROADCAST64_ENUM" varname="bc" etype="UI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to double-precision (64-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST64_NONE:
+			CASE conv OF
+			_MM_UPCONV_PD_NONE:
+				n := j*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		_MM_BROADCAST_1X8:
+			CASE conv OF
+			_MM_UPCONV_PD_NONE:
+				n := j*64
+				dst[i+63:i] := addr[63:0]
+			ESAC
+		_MM_BROADCAST_4X8:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_PD_NONE:
+				n := mod*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVAPD" form="zmm {k}, m512" xed="VMOVAPD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<instruction name="VBROADCASTF64X4" form="zmm {k}, m512" xed="VBROADCASTF64X4_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<instruction name="VBROADCASTSD" form="zmm {k}, m512" xed="VBROADCASTSD_ZMMf64_MASKmskw_MEMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extload_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="void const *" varname="mt" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="_MM_BROADCAST64_ENUM" varname="bc" etype="UI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 64-bit integer elements, storing the results in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE bc OF
+	_MM_BROADCAST64_NONE:
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE:
+			n := j*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	_MM_BROADCAST_1X8:
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE:
+			n := j*64
+			dst[i+63:i] := addr[63:0]
+		ESAC
+	_MM_BROADCAST_4X8:
+		mod := j%4
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE:
+			n := mod*64
+			dst[i+63:i] := addr[n+63:n]
+		ESAC
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="zmm, m512" xed="VMOVDQA64_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<instruction name="VBROADCASTI64X4" form="zmm, m512" xed="VBROADCASTI64X4_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<instruction name="VPBROADCASTQ" form="zmm, m512" xed="VPBROADCASTQ_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extload_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="_MM_BROADCAST64_ENUM" varname="bc" etype="UI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Depending on "bc", loads 1, 4, or 8 elements of type and size determined by "conv" from memory address "mt" and converts all elements to 64-bit integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		CASE bc OF
+		_MM_BROADCAST64_NONE:
+			CASE conv OF
+			_MM_UPCONV_EPI64_NONE:
+				n := j*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		_MM_BROADCAST_1X8:
+			CASE conv OF
+			_MM_UPCONV_EPI64_NONE:
+				n := j*64
+				dst[i+63:i] := addr[63:0]
+			ESAC
+		_MM_BROADCAST_4X8:
+			mod := j%4
+			CASE conv OF
+			_MM_UPCONV_EPI64_NONE:
+				n := mod*64
+				dst[i+63:i] := addr[n+63:n]
+			ESAC
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VMOVDQA64" form="m512 {k}, zmm" xed="VMOVDQA64_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<instruction name="VBROADCASTI64X4" form="zmm {k}, m512" xed="VBROADCASTI64X4_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<instruction name="VPBROADCASTQ" form="zmm {k}, m512" xed="VPBROADCASTQ_ZMMu64_MASKmskw_MEMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_swizzle_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v" etype="FP32"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI32"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4xsingle-precision (32-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		dst[i+31:i]    := v[i+63:i+32]
+		dst[i+63:i+32] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+31:i]
+		dst[i+63:i+32]  := v[i+31:i]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+63:i+32]
+		dst[i+95:i+64]  := v[i+63:i+32]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+95:i+64]
+		dst[i+127:i+96] := v[i+95:i+64]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+127:i+96]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+127:i+96]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]     := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_swizzle_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v" etype="FP64"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI64"/>
+	<description>Performs a swizzle transformation of each of the two groups of packed 4x double-precision (64-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		dst[i+63:i]     := v[i+127:i+64]
+		dst[i+127:i+64] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+127:i+64]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+63:i]
+		dst[i+127:i+64]  := v[i+63:i]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+127:i+63]
+		dst[i+127:i+64]  := v[i+127:i+63]
+		dst[i+191:i+128] := v[i+127:i+63]
+		dst[i+255:i+192] := v[i+127:i+63]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]      := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+191:i+128]
+		dst[i+255:i+192] := v[i+191:i+128]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+255:i+192]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+255:i+192]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+127:i+64]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_swizzle_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v" etype="UI32"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI32"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x 32-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		dst[i+31:i]    := v[i+63:i+32]
+		dst[i+63:i+32] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+31:i]
+		dst[i+63:i+32]  := v[i+31:i]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+31:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+63:i+32]
+		dst[i+95:i+64]  := v[i+63:i+32]
+		dst[i+127:i+96] := v[i+63:i+32]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+95:i+64]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+95:i+64]
+		dst[i+127:i+96] := v[i+95:i+64]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+127:i+96]
+		dst[i+63:i+32]  := v[i+127:i+96]
+		dst[i+95:i+64]  := v[i+127:i+96]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		dst[i+31:i]	    := v[i+63:i+32]
+		dst[i+63:i+32]  := v[i+95:i+64]
+		dst[i+95:i+64]  := v[i+31:i]
+		dst[i+127:i+96] := v[i+127:i+96]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_swizzle_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="v" etype="UI64"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI64"/>
+	<description>Performs a swizzle transformation of each of the two groups of packed 4x64-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst".</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		dst[i+63:i]	    := v[i+127:i+64]
+		dst[i+127:i+64] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+127:i+64]
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+63:i]
+		dst[i+127:i+64]  := v[i+63:i]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+63:i]
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+127:i+63]
+		dst[i+127:i+64]  := v[i+127:i+63]
+		dst[i+191:i+128] := v[i+127:i+63]
+		dst[i+255:i+192] := v[i+127:i+63]
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+191:i+128]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+191:i+128]
+		dst[i+255:i+192] := v[i+191:i+128]
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+255:i+192]
+		dst[i+127:i+64]  := v[i+255:i+192]
+		dst[i+191:i+128] := v[i+255:i+192]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		dst[i+63:i]	     := v[i+127:i+64]
+		dst[i+127:i+64]  := v[i+191:i+128]
+		dst[i+191:i+128] := v[i+63:i]
+		dst[i+255:i+192] := v[i+255:i+192]
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_swizzle_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v" etype="FP32"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI32"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x single-precision (32-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		IF k[j*2]
+			dst[i+31:i]	:= v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	:= src[i+31:i]
+		FI
+		IF k[j*2+1]
+			dst[i+63:i+32] := v[i+31:i]
+		ELSE
+			dst[i+63:i+32] := src[i+63:i+32]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+31:i]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+31:i]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+31:i]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+63:i+32]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+63:i+32]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+95:i+64]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+95:i+64]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+127:i+96]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+127:i+96]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_swizzle_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v" etype="FP64"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI64"/>
+	<description>Performs a swizzle transformation of each of the two groups of packed 4x double-precision (64-bit) floating-point elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		IF k[j*2]
+			dst[i+63:i]	 := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	 := src[i+63:i]
+		FI
+		IF k[j*2+1]
+			dst[i+127:i+64] := v[i+63:i]
+		ELSE
+			dst[i+127:i+64] := src[i+127:i+64]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+64]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+63:i]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+63:i]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+63:i]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+63]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+127:i+63]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+127:i+63]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+63]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+191:i+128]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+191:i+128]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+255:i+192]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+255:i+192]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_swizzle_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v" etype="UI32"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI32"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x32-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 7
+		i := j*64
+		IF k[j*2]
+			dst[i+31:i]	:= v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	:= src[i+31:i]
+		FI
+		IF k[j*2+1]
+			dst[i+63:i+32] := v[i+31:i]
+		ELSE
+			dst[i+63:i+32] := src[i+63:i+32]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+31:i]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+31:i]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+31:i]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+63:i+32]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+63:i+32]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+63:i+32]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+95:i+64]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+95:i+64]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+95:i+64]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+127:i+96]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+127:i+96]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+127:i+96]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 3
+		i := j*128
+		IF k[j*4]
+			dst[i+31:i]	 := v[i+63:i+32]
+		ELSE
+			dst[i+31:i]	 := src[i+31:i]
+		FI
+		IF k[j*4+1]
+			dst[i+63:i+32]  := v[i+95:i+64]
+		ELSE
+			dst[i+63:i+32]  := src[i+63:i+32]
+		FI
+		IF k[j*4+2]
+			dst[i+95:i+64]  := v[i+31:i]
+		ELSE
+			dst[i+95:i+64]  := src[i+95:i+64]
+		FI
+		IF k[j*4+3]
+			dst[i+127:i+96] := v[i+127:i+96]
+		ELSE
+			dst[i+127:i+96] := src[i+127:i+96]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_swizzle_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v" etype="UI64"/>
+	<parameter type="_MM_SWIZZLE_ENUM" varname="s" etype="UI64"/>
+	<description>Performs a swizzle transformation of each of the four groups of packed 4x64-bit integer elements in "v" using swizzle parameter "s", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>CASE s OF
+_MM_SWIZ_REG_NONE:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_DCBA:
+	dst[511:0] := v[511:0]
+_MM_SWIZ_REG_CDAB:
+	FOR j := 0 to 3
+		i := j*64
+		IF k[j*2]
+			dst[i+63:i]	 := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	 := src[i+63:i]
+		FI
+		IF k[j*2+1]
+			dst[i+127:i+64] := v[i+63:i]
+		ELSE
+			dst[i+127:i+64] := src[i+127:i+64]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BADC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+64]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_AAAA:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+63:i]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+63:i]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+63:i]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_BBBB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+63]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+127:i+63]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+127:i+63]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+127:i+63]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_CCCC:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+191:i+128]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+191:i+128]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+191:i+128]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DDDD:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+255:i+192]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+255:i+192]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+255:i+192]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+_MM_SWIZ_REG_DACB:
+	FOR j := 0 to 1
+		i := j*256
+		IF k[j*4]
+			dst[i+63:i]	  := v[i+127:i+64]
+		ELSE
+			dst[i+63:i]	  := src[i+63:i]
+		FI
+		IF k[j*4+1]
+			dst[i+127:i+64]  := v[i+191:i+128]
+		ELSE
+			dst[i+127:i+64]  := src[i+127:i+64]
+		FI
+		IF k[j*4+2]
+			dst[i+191:i+128] := v[i+63:i]
+		ELSE
+			dst[i+191:i+128] := src[i+191:i+128]
+		FI
+		IF k[j*4+3]
+			dst[i+255:i+192] := v[i+255:i+192]
+		ELSE
+			dst[i+255:i+192] := src[i+255:i+192]
+		FI
+	ENDFOR
+ESAC
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extstore_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32"/>
+	<parameter type="__m512" varname="v" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed single-precision (32-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]		
+FOR j := 0 to 15
+	i := j*32
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:
+		addr[i+31:i] := v[i+31:i]
+	_MM_DOWNCONV_PS_FLOAT16:
+		n := j*16
+		addr[n+15:n] := Convert_FP32_To_FP16(v[i+31:i])
+	_MM_DOWNCONV_PS_UINT8:
+		n := j*8
+		addr[n+7:n] := Convert_FP32_To_UInt8(v[i+31:i])
+	_MM_DOWNCONV_PS_SINT8:
+		n := j*8
+		addr[n+7:n] := Convert_FP32_To_Int8(v[i+31:i])
+	_MM_DOWNCONV_PS_UINT16:
+		n := j*16
+		addr[n+15:n] := Convert_FP32_To_UInt16(v[i+31:i])
+	_MM_DOWNCONV_PS_SINT16:
+		n := j*16
+		addr[n+15:n] := Convert_FP32_To_Int16(v[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPS" form="m512, zmm" xed="VMOVAPS_MEMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extstore_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI32"/>
+	<parameter type="__m512i" varname="v" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed 32-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	CASE conv OF
+	_MM_DOWNCONV_EPI32_NONE:
+		addr[i+31:i] := v[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:
+		n := j*8
+		addr[n+7:n] := Int32ToUInt8(v[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:
+		n := j*8
+		addr[n+7:n] := Int32ToSInt8(v[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16:
+		n := j*16
+		addr[n+15:n] := Int32ToUInt16(v[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16:
+		n := j*16
+		addr[n+15:n] := Int32ToSInt16(v[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA32" form="m512, zmm" xed="VMOVDQA32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extstore_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64"/>
+	<parameter type="__m512d" varname="v" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed double-precision (64-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:
+		addr[i+63:i] := v[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPD" form="m512, zmm" xed="VMOVAPD_MEMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extstore_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI64"/>
+	<parameter type="__m512i" varname="v" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed 64-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA64" form="m512, zmm" xed="VMOVDQA64_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extstore_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed single-precision (32-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" using writemask "k" (elements are not written to memory when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_PS_NONE:
+			mt[i+31:i] := v[i+31:i]
+		_MM_DOWNCONV_PS_FLOAT16:
+			n := j*16
+			mt[n+15:n] := Convert_FP32_To_FP16(v[i+31:i])
+		_MM_DOWNCONV_PS_UINT8:
+			n := j*8
+			mt[n+7:n] := Convert_FP32_To_UInt8(v[i+31:i])
+		_MM_DOWNCONV_PS_SINT8:
+			n := j*8
+			mt[n+7:n] := Convert_FP32_To_Int8(v[i+31:i])
+		_MM_DOWNCONV_PS_UINT16:
+			n := j*16
+			mt[n+15:n] := Convert_FP32_To_UInt16(v[i+31:i])
+		_MM_DOWNCONV_PS_SINT16:
+			n := j*16
+			mt[n+15:n] := Convert_FP32_To_Int16(v[i+31:i])
+		ESAC
+	 FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPS" form="m512 {k}, zmm" xed="VMOVAPS_MEMf32_MASKmskw_ZMMf32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extstore_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed double-precision (64-bit) floating-point elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]		
+FOR j := 0 to 7
+	i := j*64
+	CASE conv OF
+	_MM_DOWNCONV_PD_NONE:
+		IF k[j]
+			mt[i+63:i] := v[i+63:i]
+		FI
+	ESAC
+ENDFOR
+	</operation>
+	<instruction name="VMOVAPD" form="m512 {k}, zmm" xed="VMOVAPD_MEMf64_MASKmskw_ZMMf64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extstore_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed 32-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI32_NONE:
+			addr[i+31:i] := v[i+31:i]
+		_MM_DOWNCONV_EPI32_UINT8:
+			n := j*8
+			addr[n+7:n] := Int32ToUInt8(v[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT8:
+			n := j*8
+			addr[n+7:n] := Int32ToSInt8(v[i+31:i])
+		_MM_DOWNCONV_EPI32_UINT16:
+			n := j*16
+			addr[n+15:n] := Int32ToUInt16(v[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT16:
+			n := j*16
+			addr[n+15:n] := Int32ToSInt16(v[i+31:i])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA32" form="m512 {k}, zmm" xed="VMOVDQA32_MEMu32_MASKmskw_ZMMu32_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extstore_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Downconverts packed 64-bit integer elements stored in "v" to a smaller type depending on "conv" and stores them in memory location "mt" (elements in "mt" are unaltered when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE: addr[i+63:i] := v[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VMOVDQA64" form="m512 {k}, zmm" xed="VMOVDQA64_MEMu64_MASKmskw_ZMMu64_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_storenr_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="v" etype="FP32"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements from "v" to memory address "mt" with a no-read hint to the processor.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	addr[i+31:i] := v[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VMOVNRAPS" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_storenr_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="v" etype="FP64"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements from "v" to memory address "mt" with a no-read hint to the processor.</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	addr[i+63:i] := v[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VMOVNRAPD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_storenrngo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="v" etype="FP32"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements from "v" to memory address "mt" with a no-read hint and using a weakly-ordered memory consistency model (stores performed with this function are not globally ordered, and subsequent stores from the same thread can be observed before them).</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	addr[i+31:i] := v[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="VMOVNRNGOAPS" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_storenrngo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="v" etype="FP64"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements from "v" to memory address "mt" with a no-read hint and using a weakly-ordered memory consistency model (stores performed with this function are not globally ordered, and subsequent stores from the same thread can be observed before them).</description>
+	<operation>
+addr := MEM[mt]
+FOR j := 0 to 7
+	i := j*64
+	addr[i+63:i] := v[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VMOVNRNGOAPD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_adc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="k2_res" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element addition of packed 32-bit integers in "v2" and "v3" and the corresponding bit in "k2", storing the result of the addition in "dst" and the result of the carry in "k2_res".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	k2_res[j]   := Carry(v2[i+31:i] + v3[i+31:i] + k2[j])
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i] + k2[j]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADCD" form="zmm, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_adc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="k2_res" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element addition of packed 32-bit integers in "v2" and "v3" and the corresponding bit in "k2", storing the result of the addition in "dst" and the result of the carry in "k2_res" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		k2_res[j]   := Carry(v2[i+31:i] + v3[i+31:i] + k2[j])
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i] + k2[j]
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADCD" form="zmm {k}, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addn_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<description>Performs element-by-element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPD" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addn_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<description>Performs element-by-element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPD" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addn_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<description>Performs element-by-element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addn_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<description>Performs element-by-element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates their sum, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addn_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element by element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPD" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addn_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element by element addition between packed double-precision (64-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := -(v2[i+63:i] + v3[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPD" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addn_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element by element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addn_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element by element addition between packed single-precision (32-bit) floating-point elements in "v2" and "v3" and negates the sum, storing the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := -(v2[i+31:i] + v3[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDNPS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_subr_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPD" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_subr_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPD" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_subr_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_subr_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_subr_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPD" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_subr_round_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512d" varname="v3" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element subtraction of packed double-precision (64-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := v3[i+63:i] - v2[i+63:i]
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPD" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_subr_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_subr_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element subtraction of packed single-precision (32-bit) floating-point elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSUBRPS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_subr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3" storing the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBRD" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_subr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3" storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set)</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBRD" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addsetc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="k2_res" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the resultant carry in "k2_res" (carry flag) and the addition results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	k2_res[j] := Carry(v2[i+31:i] + v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSETCD" form="zmm, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addsetc_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k_old" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="k2_res" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the resultant carry in "k2_res" (carry flag) and the addition results in "dst" using writemask "k" (elements are copied from "v2" and "k_old" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+		k2_res[j] := k_old[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSETCD" form="zmm {k}, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addsets_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="sign" etype="MASK" memwidth="16"/>
+	<description>Performs an element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSETSD" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addsets_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="sign" etype="MASK" memwidth="16"/>
+	<description>Performs an element-by-element addition of packed 32-bit integer elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPADDSETSD" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addsets_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="__mmask16 *" varname="sign" etype="MASK" memwidth="16"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDSETSPS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addsets_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="__mmask16 *" varname="sign" etype="MASK" memwidth="16"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDSETSPS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_addsets_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="__mmask16 *" varname="sign" etype="MASK" memwidth="16"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+	sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDSETSPS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_addsets_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512" varname="v3" etype="FP32"/>
+	<parameter type="__mmask16 *" varname="sign" etype="MASK" memwidth="16"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs an element-by-element addition of packed single-precision (32-bit) floating-point elements in "v2" and "v3", storing the results in "dst" and the sign of the sum in "sign" (sign flag). Results are stored using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] + v3[i+31:i]
+		sign[j] := v2[i+31:i] &amp; v3[i+31:i] &amp; 0x80000000
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VADDSETSPS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_subsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v3" from "v2", storing the results in "dst" and the nth borrow bit in the nth position of "borrow" (borrow flag).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] - v3[i+31:i]
+	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSETBD" form="zmm, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_subsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k_old" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v3" from "v2", storing the results in "dst" and the nth borrow bit in the nth position of "borrow" (borrow flag). Results are stored using writemask "k" (elements are copied from "v2" and "k_old" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := v2[i+31:i] - v3[i+31:i]
+		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i])
+	ELSE
+		dst[i+31:i] := v3[i+31:i]
+		borrow[j] := k_old[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBSETBD" form="zmm {k}, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_subrsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3", storing the results in "dst" and "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i]
+	borrow[j] := Borrow(v3[i+31:i] - v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBRSETBD" form="zmm, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_subrsetb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k_old" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element subtraction of packed 32-bit integer elements in "v2" from "v3", storing the results in "dst" and "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are written using writemask "k" (elements are copied from "k" to "k_old" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		diff := v3[i+31:i] - v2[i+31:i]
+		borrow[j] := Borrow(v3[i+31:i] - v2[i+31:i])
+		dst[i+31:i] := diff
+		v2[i+31:i] := diff
+	ELSE
+		borrow[j] := k_old[j]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSUBRSETBD" form="zmm {k}, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_sbb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v3" as well as the corresponding bit from "k" from "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i] - v3[i+31:i] - k[j]
+	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSBBD" form="zmm, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_sbb_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v3" as well as the corresponding bit from "k2" from "v2". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		dst[i+31:i] := v2[i+31:i] - v3[i+31:i] - k2[j]
+		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k2[j])
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSBBD" form="zmm {k}, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_sbbr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v2" as well as the corresponding bit from "k" from "v3". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v3[i+31:i] - v2[i+31:i] - k[j]
+	borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k[j])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSBBRD" form="zmm, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_sbbr_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<parameter type="__mmask16 *" varname="borrow" etype="MASK" memwidth="16"/>
+	<description>Performs element-by-element three-input subtraction of packed 32-bit integer elements of "v2" as well as the corresponding bit from "k2" from "v3". The borrowed value from the subtraction difference for the nth element is written to the nth bit of "borrow" (borrow flag). Results are stored in "dst" using writemask "k1" (elements are copied from "v2" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k1[j]
+		dst[i+31:i] := v3[i+31:i] - v2[i+31:i] - k2[j]
+		borrow[j] := Borrow(v2[i+31:i] - v3[i+31:i] - k2[j])
+	ELSE
+		dst[i+31:i] := v2[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPSBBRD" form="zmm {k}, k, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cvt_roundpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed single-precision (32-bit) floating-point elements, storing the results in "dst". Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Convert_FP64_To_FP32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_cvt_roundpd_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_FP64_To_FP32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTPD2PS" form="zmm {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cvtfxpnt_roundpd_epu32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements, storing the results in "dst". Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Convert_FP64_To_Int32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTPD2UDQ" form="zmm, zmm, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_cvtfxpnt_roundpd_epu32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs element-by-element conversion of packed double-precision (64-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are written to the lower half of "dst", and the upper half locations are set to '0'.
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_FP64_To_Int32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTPD2UDQ" form="zmm {k}, zmm, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cvtfxpnt_round_adjustps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Performs element-by-element conversion of packed single-precision (32-bit) floating-point elements in "v2" to packed 32-bit integer elements and performs an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i]
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+	_MM_EXPADJ_4:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+	_MM_EXPADJ_5:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+	_MM_EXPADJ_8:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+	_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+	_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+	_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+	_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+	ESAC
+	dst[i+31:i] := Float32ToInt32(dst[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTPS2DQ" form="zmm, zmm, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cvtfxpnt_round_adjustps_epu32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Performs element-by-element conversion of packed single-precision (32-bit) floating-point elements in "v2" to packed 32-bit unsigned integer elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := v2[i+31:i]
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+	_MM_EXPADJ_4:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+	_MM_EXPADJ_5:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+	_MM_EXPADJ_8:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+	_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+	_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+	_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+	_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+	ESAC
+	dst[i+31:i] := Float32ToUInt32(dst[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTPS2UDQ" form="zmm, zmm, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cvtfxpnt_round_adjustepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Performs element-by-element conversion of packed 32-bit unsigned integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := UInt32ToFloat32(v2[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+	_MM_EXPADJ_4:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+	_MM_EXPADJ_5:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+	_MM_EXPADJ_8:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+	_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+	_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+	_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+	_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTUDQ2PS" form="zmm, zmm, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_cvtfxpnt_round_adjustepu32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Performs element-by-element conversion of packed 32-bit unsigned integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Int32ToFloat32(v2[i+31:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+		_MM_EXPADJ_4:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+		_MM_EXPADJ_5:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+		_MM_EXPADJ_8:	 dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+		_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+		_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+		_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+		_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTUDQ2PS" form="zmm {k}, zmm, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_exp223_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<description>Approximates the base-2 exponent of the packed single-precision (32-bit) floating-point elements in "v2" with eight bits for sign and magnitude and 24 bits for the fractional part. Results are stored in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := exp223(v2[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP223PS" form="zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_exp223_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<description>Approximates the base-2 exponent of the packed single-precision (32-bit) floating-point elements in "v2" with eight bits for sign and magnitude and 24 bits for the fractional part. Results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := exp223(v2[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VEXP223PS" form="zmm {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_fixupnan_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512i" varname="v3" etype="UI64"/>
+	<description>Fixes up NaN's from packed double-precision (64-bit) floating-point elements in "v1" and "v2", storing the results in "dst" and storing the quietized NaN's from "v1" in "v3".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FixupNaNs(v1[i+63:i], v2[i+63:i])
+	v3[i+63:i] := QuietizeNaNs(v1[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPNANPD" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_fixupnan_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="__m512i" varname="v3" etype="UI64"/>
+	<description>Fixes up NaN's from packed double-precision (64-bit) floating-point elements in "v1" and "v2", storing the results in "dst" using writemask "k" (only elements whose corresponding mask bit is set are used in the computation). Quietized NaN's from "v1" are stored in "v3".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FixupNaNs(v1[i+63:i], v2[i+63:i])
+		v3[i+63:i] := QuietizeNaNs(v1[i+63:i])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPNANPD" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_fixupnan_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<description>Fixes up NaN's from packed single-precision (32-bit) floating-point elements in "v1" and "v2", storing the results in "dst" and storing the quietized NaN's from "v1" in "v3".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FixupNaNs(v1[i+31:i], v2[i+31:i])
+	v3[i+31:i] := QuietizeNaNs(v1[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPNANPS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_fixupnan_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v2" etype="FP32"/>
+	<parameter type="__m512i" varname="v3" etype="UI32"/>
+	<description>Fixes up NaN's from packed single-precision (32-bit) floating-point elements in "v1" and "v2", storing the results in "dst" using writemask "k" (only elements whose corresponding mask bit is set are used in the computation). Quietized NaN's from "v1" are stored in "v3".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FixupNaNs(v1[i+31:i], v2[i+31:i])
+		v3[i+31:i] := QuietizeNaNs(v1[i+31:i])
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFIXUPNANPS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="void const *" varname="mt" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN ZeroExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN SignExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN ZeroExtend32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN SignExtend32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN 4
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN ZeroExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN SignExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN ZeroExtend32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN SignExtend32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN 4
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="void const *" varname="mt" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN ZeroExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN SignExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN ZeroExtend32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN SignExtend32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN 4
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * upSize) % 64 == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN ZeroExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN SignExtend32(MEM[addr + offset])
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN ZeroExtend32(MEM[addr + 2*offset])
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN SignExtend32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:
+		RETURN 4
+	_MM_UPCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_UPCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_UPCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * upSize) % 64 == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="void const *" varname="mt" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHQ" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHQ" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="void const *" varname="mt" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (addr + loadOffset*upSize % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLQ" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (addr + loadOffset*upSize % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLQ" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="void const *" varname="mt" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN 4
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN 2
+	_MM_UPCONV_PS_UINT8:
+		RETURN 1
+	_MM_UPCONV_PS_SINT8:
+		RETURN 1
+	_MM_UPCONV_PS_UINT16:
+		RETURN 2
+	_MM_UPCONV_PS_SINT16:
+		RETURN 2
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPS" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPS" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="void const *" varname="mt" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := MEM[mt]
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * upSize) % 64 == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPS" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
+	ESAC
+}
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:
+		RETURN MEM[addr + 4*offset]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := MEM[mt]
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * upSize) % 64 == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPS" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="void const *" varname="mt" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*upSize) % 64 == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+upSize := UPCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*upSize) % 64 == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extloadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="void const *" varname="mt" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal.</description>
+	<operation>
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * upSize) % 64 == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extloadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt, up-converted depending on the value of "conv", and expanded into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elemenst are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE UPCONVERT(addr, offset, convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN MEM[addr + 8*offset]
+	ESAC
+}
+DEFINE UPCONVERTSIZE(convertTo) {
+	CASE conv OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+dst[511:0] := src[511:0]
+loadOffset := 0
+upSize := UPCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		dst[i+63:i] := UPCONVERT(addr, loadOffset, conv)
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * upSize) % 64 == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI32"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN element[31:0]
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN element[31:0]
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			tmp := DOWNCONVERT(v1[i+31:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			4: MEM[storeAddr] := tmp[31:0]
+			2: MEM[storeAddr] := tmp[15:0]
+			1: MEM[storeAddr] := tmp[7:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI32"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN element[31:0]
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := DOWNCONVERT(v1[i+31:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	4: MEM[storeAddr] := tmp[31:0]
+	2: MEM[storeAddr] := tmp[15:0]
+	1: MEM[storeAddr] := tmp[7:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 32-bit integer elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are written to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN element[31:0]
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_DOWNCONV_EPI32_NONE:
+		RETURN 4
+	_MM_DOWNCONV_EPI32_UINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_SINT8:
+		RETURN 1
+	_MM_DOWNCONV_EPI32_UINT16:
+		RETURN 2
+	_MM_DOWNCONV_EPI32_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI64"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHQ" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (mt-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresonding mask bit is not set).</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			tmp := DOWNCONVERT(v1[i+63:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			8: MEM[storeAddr] := tmp[63:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHQ" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI64"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	i := j*63
+	tmp := DOWNCONVERT(v1[i+63:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	8: MEM[storeAddr] := tmp[63:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELQ" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed 64-bit integer elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped whent he corresponding mask bit is not set).</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_EPI64_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*63
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELQ" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN element[31:0]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP32_To_FP16(element[31:0])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN 4
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN 2
+	_MM_UPCONV_PS_UINT8:
+		RETURN 1
+	_MM_UPCONV_PS_SINT8:
+		RETURN 1
+	_MM_UPCONV_PS_UINT16:
+		RETURN 2
+	_MM_UPCONV_PS_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHPS" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN element[31:0]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP32_To_FP16(element[31:0])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN 4
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN 2
+	_MM_UPCONV_PS_UINT8:
+		RETURN 1
+	_MM_UPCONV_PS_SINT8:
+		RETURN 1
+	_MM_UPCONV_PS_UINT16:
+		RETURN 2
+	_MM_UPCONV_PS_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			tmp := DOWNCONVERT(v1[i+31:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			4: MEM[storeAddr] := tmp[31:0]
+			2: MEM[storeAddr] := tmp[15:0]
+			1: MEM[storeAddr] := tmp[7:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHPS" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN element[31:0]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP32_To_FP16(element[31:0])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN 4
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN 2
+	_MM_UPCONV_PS_UINT8:
+		RETURN 1
+	_MM_UPCONV_PS_SINT8:
+		RETURN 1
+	_MM_UPCONV_PS_UINT16:
+		RETURN 2
+	_MM_UPCONV_PS_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := DOWNCONVERT(v1[i+31:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	4: MEM[storeAddr] := tmp[31:0]
+	2: MEM[storeAddr] := tmp[15:0]
+	1: MEM[storeAddr] := tmp[7:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELPS" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed single-precision (32-bit) floating-point elements of "v1" into a byte/word/doubleword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN element[31:0]
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN Convert_FP32_To_FP16(element[31:0])
+	_MM_UPCONV_PS_UINT8:
+		RETURN Truncate8(element[31:0])
+	_MM_UPCONV_PS_SINT8:
+		RETURN Saturate8(element[31:0])
+	_MM_UPCONV_PS_UINT16:
+		RETURN Truncate16(element[31:0])
+	_MM_UPCONV_PS_SINT16:
+		RETURN Saturate16(element[31:0])
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PS_NONE:
+		RETURN 4
+	_MM_UPCONV_PS_FLOAT16:
+		RETURN 2
+	_MM_UPCONV_PS_UINT8:
+		RETURN 1
+	_MM_UPCONV_PS_SINT8:
+		RETURN 1
+	_MM_UPCONV_PS_UINT16:
+		RETURN 2
+	_MM_UPCONV_PS_SINT16:
+		RETURN 2
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		tmp := DOWNCONVERT(v1[i+31:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		4: MEM[storeAddr] := tmp[31:0]
+		2: MEM[storeAddr] := tmp[15:0]
+		1: MEM[storeAddr] := tmp[7:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELPS" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHPD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+foundNext64BytesBoundary := false
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF ((addr + (storeOffset + 1)*downSize) % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			tmp := DOWNCONVERT(v1[i+63:i], conv)
+			storeAddr := addr + storeOffset * downSize
+			CASE downSize OF
+			8: MEM[storeAddr] := tmp[63:0]
+			ESAC
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTOREHPD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_extpackstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	i := j*63
+	tmp := DOWNCONVERT(v1[i+63:i], conv)
+	storeAddr := addr + storeOffset * downSize
+	CASE downSize OF
+	8: MEM[storeAddr] := tmp[63:0]
+	ESAC
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset * downSize) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELPD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_extpackstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mt" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE"/>
+	<description>Down-converts and stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream according to "conv" at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). "hint" indicates to the processor whether the data is non-temporal. Elements are stored to memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE DOWNCONVERT(element, convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN element[63:0]
+	ESAC
+}
+DEFINE DOWNCONVERTSIZE(convertTo) {
+	CASE convertTo OF
+	_MM_UPCONV_PD_NONE:
+		RETURN 8
+	ESAC
+}
+storeOffset := 0
+downSize := DOWNCONVERTSIZE(conv)
+addr := mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*63
+		tmp := DOWNCONVERT(v1[i+63:i], conv)
+		storeAddr := addr + storeOffset * downSize
+		CASE downSize OF
+		8: MEM[storeAddr] := tmp[63:0]
+		ESAC
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset * downSize) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPACKSTORELPD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_i32loscatter_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+	MEM[addr+63:addr] := a[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="m512, zmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_i32loscatter_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI32"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed 64-bit integer elements located in "a" and stores them in memory locations starting at location "base_addr" at packed 32-bit integer indices stored in "vindex" scaled by "scale" using writemask "k" (elements whose corresponding mask bit is not set are not written to memory).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*32
+	IF k[j]
+		addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+		MEM[addr+63:addr] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPSCATTERDQ" form="m512 {k}, zmm" xed="VPSCATTERDQ_MEMu64_MASKmskw_ZMMu64_AVX512_VL512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="void const*" varname="mt" etype="UI32" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*4 % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const *" varname="mt" etype="UI32" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*4 % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			tmp := MEM[addr + loadOffset*4]
+			dst[i+31:i] := tmp[i+31:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="void const*" varname="mt" etype="UI32" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt and expanded into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := MEM[addr + loadOffset*4]
+	dst[i+31:i] := tmp[i+31:i]
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * 4) % 64 == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mt" etype="UI32" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt and expands them into packed 32-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * 4) % 64 == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="void const*" varname="mt" etype="UI64" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*8) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHQ" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mt" etype="UI64" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*8) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			tmp := MEM[addr + loadOffset*8]
+			dst[i+63:i] := tmp[i+63:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHQ" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="void const*" varname="mt" etype="UI64" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	tmp := MEM[addr + loadOffset*8]
+	dst[i+63:i] := tmp[i+63:i]
+	loadOffset := loadOffset + 1
+	IF (addr + loadOffset*8 % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLQ" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mt" etype="UI64" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed 64-bit integers in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+		loadOffset := loadOffset + 1
+		IF (addr + loadOffset*8 % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLQ" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="void const*" varname="mt" etype="FP32" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64 and expands them into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*4 % 64) == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*32
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPS" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mt" etype="FP32" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt-64 and expands them into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*4 % 64) == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*32
+			tmp := MEM[addr + loadOffset*4]
+			dst[i+31:i] := tmp[i+31:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPS" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="void const*" varname="mt" etype="FP32" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	tmp := MEM[addr + loadOffset*4]
+	dst[i+31:i] := tmp[i+31:i]
+	loadOffset := loadOffset + 1
+	IF (mt + loadOffset * 4) % 64 == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPS" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mt" etype="FP32" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the doubleword stream starting at element-aligned address mt and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted doublewords that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those doublewords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		tmp := MEM[addr + loadOffset*4]
+		dst[i+31:i] := tmp[i+31:i]
+		loadOffset := loadOffset + 1
+		IF (mt + loadOffset * 4) % 64 == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPS" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="void const*" varname="mt" etype="FP64" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == false
+		IF (addr + (loadOffset + 1)*8) % 64 == 0
+			foundNext64BytesBoundary := true
+		FI
+	ELSE
+		i := j*64
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+	FI
+	loadOffset := loadOffset + 1
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mt" etype="FP64" memwidth="512"/>
+	<description>Loads the high-64-byte-aligned portion of the quadword stream starting at element-aligned address mt-64 and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>dst[511:0] := src[511:0]
+loadOffset := 0
+foundNext64BytesBoundary := false
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == false
+			IF (addr + (loadOffset + 1)*8) % 64 == 0
+				foundNext64BytesBoundary := true
+			FI
+		ELSE
+			i := j*64
+			tmp := MEM[addr + loadOffset*8]
+			dst[i+63:i] := tmp[i+63:i]
+		FI
+		loadOffset := loadOffset + 1
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKHPD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_loadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="void const*" varname="mt" etype="FP64" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed double-precision (64-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src".</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	tmp := MEM[addr + loadOffset*8]
+	dst[i+63:i] := tmp[i+63:i]
+	loadOffset := loadOffset + 1
+	IF ((addr + 8*loadOffset) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPD" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_loadunpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="void const*" varname="mt" etype="FP64" memwidth="512"/>
+	<description>Loads the low-64-byte-aligned portion of the quadword stream starting at element-aligned address mt and expands them into packed double-precision (64-bit) floating-point values in "dst". The initial values of "dst" are copied from "src". Only those converted quad that occur before first 64-byte-aligned address following "mt" are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+dst[511:0] := src[511:0]
+loadOffset := 0
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		tmp := MEM[addr + loadOffset*8]
+		dst[i+63:i] := tmp[i+63:i]
+		loadOffset := loadOffset + 1
+		IF ((addr + 8*loadOffset) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOADUNPACKLPD" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI32" memwidth="512"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == 0
+		IF ((addr + (storeOffset + 1)*4) % 64) == 0
+			foundNext64BytesBoundary := 1
+		FI
+	ELSE
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorehi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elements of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == 0
+			IF ((addr + (storeOffset + 1)*4) % 64) == 0
+				foundNext64BytesBoundary := 1
+			FI
+		ELSE
+			i := j*32
+			MEM[addr + storeOffset*4] := v1[i+31:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI32" memwidth="512"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	MEM[addr + storeOffset*4] := v1[i+31:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*4) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorelo_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI32"/>
+	<description>Stores packed 32-bit integer elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*4) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == 0
+		IF ((addr + (storeOffset + 1)*8) % 64) == 0
+			foundNext64BytesBoundary := 1
+		FI
+	ELSE
+		i := j*64
+		MEM[addr + storeOffset*8] := v1[i+63:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHQ" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorehi_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == 0
+			IF ((addr + (storeOffset + 1)*8) % 64) == 0
+				foundNext64BytesBoundary := 1
+			FI
+		ELSE
+			i := j*64
+			MEM[addr + storeOffset*8] := v1[i+63:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHQ" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	MEM[addr + storeOffset*8] := v1[i+63:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*8) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELQ" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorelo_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="UI64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="v1" etype="UI64"/>
+	<description>Stores packed 64-bit integer elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		MEM[addr + storeOffset*8] := v1[i+63:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*8) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELQ" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 15
+	IF foundNext64BytesBoundary == 0
+		IF ((addr + (storeOffset + 1)*4) % 64) == 0
+			foundNext64BytesBoundary := 1
+		FI
+	ELSE
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHPS" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorehi_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 15
+	IF k[j]
+		IF foundNext64BytesBoundary == 0
+			IF ((addr + (storeOffset + 1)*4) % 64) == 0
+				foundNext64BytesBoundary := 1
+			FI
+		ELSE
+			i := j*32
+			MEM[addr + storeOffset*4] := v1[i+31:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHPS" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 15
+	i := j*32
+	MEM[addr + storeOffset*4] := v1[i+31:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*4) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELPS" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorelo_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP32" memwidth="512"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="v1" etype="FP32"/>
+	<description>Stores packed single-precision (32-bit) floating-point elements of "v1" into a doubleword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 15
+	IF k[j]
+		i := j*32
+		MEM[addr + storeOffset*4] := v1[i+31:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*4) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELPS" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 7
+	IF foundNext64BytesBoundary == 0
+		IF ((addr + (storeOffset + 1)*8) % 64) == 0
+			foundNext64BytesBoundary := 1
+		FI
+	ELSE
+		i := j*64
+		MEM[addr + storeOffset*4] := v1[i+63:i]
+	FI
+	storeOffset := storeOffset + 1
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHPD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorehi_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address (mt-64), storing the high-64-byte elements of that stream (those elemetns of the stream that map at or after the first 64-byte-aligned address following (m5-64)). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+foundNext64BytesBoundary := 0
+addr := mt-64
+FOR j := 0 to 7
+	IF k[j]
+		IF foundNext64BytesBoundary == 0
+			IF ((addr + (storeOffset + 1)*8) % 64) == 0
+				foundNext64BytesBoundary := 1
+			FI
+		ELSE
+			i := j*64
+			MEM[addr + storeOffset*4] := v1[i+63:i]
+		FI
+		storeOffset := storeOffset + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTOREHPD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_packstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt").</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 7
+	i := j*64
+	MEM[addr + storeOffset*8] := v1[i+63:i]
+	storeOffset := storeOffset + 1
+	IF ((addr + storeOffset*8) % 64) == 0
+		BREAK
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELPD" form="m512, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_packstorelo_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mt" etype="FP64" memwidth="512"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v1" etype="FP64"/>
+	<description>Stores packed double-precision (64-bit) floating-point elements of "v1" into a quadword stream at a logically mapped starting address "mt", storing the low-64-byte elements of that stream (those elements of the stream that map before the first 64-byte-aligned address follwing "mt"). Elements are loaded from memory according to element selector "k" (elements are skipped when the corresponding mask bit is not set).</description>
+	<operation>
+storeOffset := 0
+addr := mt
+FOR j := 0 to 7
+	IF k[j]
+		i := j*64
+		MEM[addr + storeOffset*8] := v1[i+63:i]
+		storeOffset := storeOffset + 1
+		IF ((addr + storeOffset*8) % 64) == 0
+			BREAK
+		FI
+	FI
+ENDFOR
+	</operation>
+	<instruction name="VPACKSTORELPD" form="m512 {k}, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_countbits_32">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="r1" etype="UI32"/>
+	<description>Counts the number of set bits in 32-bit unsigned integer "r1", returning the results in "dst".</description>
+	<operation>dst[31:0] := PopCount(r1[31:0])
+	</operation>
+	<instruction name="POPCNT" form="r32, r32" xed="POPCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_countbits_64">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="r1" etype="UI64"/>
+	<description>Counts the number of set bits in 64-bit unsigned integer "r1", returning the results in "dst".</description>
+	<operation>dst[63:0] := PopCount(r1[63:0])
+	</operation>
+	<instruction name="POPCNT" form="r64, r64" xed="POPCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kmovlhb">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="dst" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Inserts the low byte of mask "k2" into the high byte of "dst", and copies the low byte of "k1" to the low byte of "dst".</description>
+	<operation>
+dst[7:0] := k1[7:0]
+dst[15:8] := k2[7:0]
+	</operation>
+	<instruction name="KMERGE2L1L" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cvtfxpnt_roundpd_epi32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs an element-by-element conversion of elements in packed double-precision (64-bit) floating-point vector "v2" to 32-bit integer elements, storing them in the lower half of "dst". The elements in the upper half of "dst" are set to 0.
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	k := j*32
+	dst[k+31:k] := Convert_FP64_To_Int32(v2[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTPD2DQ" form="zmm, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_cvtfxpnt_roundpd_epi32lo">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512i" varname="dst" etype="SI32"/>
+	<parameter type="__m512i" varname="src" etype="SI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="v2" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Performs an element-by-element conversion of elements in packed double-precision (64-bit) floating-point vector "v2" to 32-bit integer elements, storing them in the lower half of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). The elements in the upper half of "dst" are set to 0.
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	l := j*32
+	IF k[j]
+		dst[l+31:l] := Convert_FP64_To_Int32(v2[i+63:i])
+	ELSE
+		dst[l+31:l] := src[l+31:l]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTPD2DQ" form="zmm {k}, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_cvtfxpnt_round_adjustepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="v2" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Performs element-by-element conversion of packed 32-bit integer elements in "v2" to packed single-precision (32-bit) floating-point elements and performing an optional exponent adjust using "expadj", storing the results in "dst".
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := Int32ToFloat32(v2[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+	_MM_EXPADJ_4:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+	_MM_EXPADJ_5:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+	_MM_EXPADJ_8:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+	_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+	_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+	_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+	_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VCVTFXPNTDQ2PS" form="zmm, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_log2ae23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a" with absolute error of 2^(-23) and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOG2PS" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_log2ae23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a" with absolute error of 2^(-23) and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0)
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VLOG2PS" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_fmadd_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD231D" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_fmadd_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst" using writemask "k" (elements are copied from "a" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD231D" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask3_fmadd_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="__m512i" varname="c" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<description>Multiply packed 32-bit integer elements in "a" and "b", add the intermediate result to packed elements in "c" and store the results in "dst" using writemask "k" (elements are copied from "c" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
+	ELSE
+		dst[i+31:i] := c[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD231D" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_fmadd233_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply packed 32-bit integer elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	base := (j &amp; ~0x3) * 32
+	scale[31:0] := b[base+63:base+32]
+	bias[31:0]  := b[base+31:base]
+	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD233D" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_fmadd233_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Multiply packed 32-bit integer elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		base := (j &amp; ~0x3) * 32
+		scale[31:0] := b[base+63:base+32]
+		bias[31:0]  := b[base+31:base]
+		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMADD233D" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_fmadd233_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	base := (j &amp; ~0x3) * 32
+	scale[31:0] := b[base+63:base+32]
+	bias[31:0]  := b[base+31:base]
+	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD233PS" form="zmm, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_fmadd233_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		base := (j &amp; ~0x3) * 32
+		scale[31:0] := b[base+63:base+32]
+		bias[31:0]  := b[base+31:base]
+		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD233PS" form="zmm {k}, zmm, zmm"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_maxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMax(ABS(a[i+31:i]), ABS(b[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXABSPS" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_maxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMax(ABS(a[i+31:i]), ABS(b[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXABSPS" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_gmax_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the maximum of each pair of corresponding elements in packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMax(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXPS" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_gmax_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMax(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXPS" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_gmaxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMax(ABS(a[i+31:i]), ABS(b[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXABSPS" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_gmaxabs_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the maximum of the absolute elements of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMax(ABS(a[i+31:i]), ABS(b[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXABSPS" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_gmax_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Determines the maximum of each pair of corresponding elements in packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FpMax(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXPD" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_gmax_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FpMax(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMAXPD" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_gmin_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the minimum of each pair of corresponding elements in packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := FpMin(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMINPS" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_gmin_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed single-precision (32-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := FpMin(a[i+31:i], b[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMINPS" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_gmin_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Determines the minimum of each pair of corresponding elements in packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := FpMin(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMINPD" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_gmin_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="__m512d" varname="b" etype="FP64"/>
+	<description>Determines the maximum of each pair of corresponding elements of packed double-precision (64-bit) floating-point elements in "a" and "b", storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := FpMin(a[i+63:i], b[i+63:i])
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VGMINPD" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mulhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Performs element-by-element multiplication between packed 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHD" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_mulhi_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Performs element-by-element multiplication between packed 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHD" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mulhi_epu32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Performs element-by-element multiplication between packed unsigned 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHUD" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_mulhi_epu32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Performs element-by-element multiplication between packed unsigned 32-bit integer elements in "a" and "b" and stores the high 32 bits of each result into "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) &gt;&gt; 32
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPMULHUD" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_permute4f128_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Permutes 128-bit blocks of the packed 32-bit integer vector "a" using constant "imm8". The results are stored in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control)  {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+FOR j := 0 to 3
+	i := j*128
+	n := j*2
+	dst[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMF32X4" form="zmm, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_permute4f128_epi32">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Permutes 128-bit blocks of the packed 32-bit integer vector "a" using constant "imm8". The results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control)  {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp[511:0] := 0
+FOR j := 0 to 3
+	i := j*128
+	n := j*2
+	tmp[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+FOR j := 0 to 15
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMF32X4" form="zmm {k}, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_rcp23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Approximates the reciprocals of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of precision, storing the results in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP23PS" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_rcp23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Approximates the reciprocals of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of precision, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := (1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRCP23PS" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value using "expadj" and in the direction of "rounding", and store the results as packed single-precision floating-point elements in "dst".
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+	_MM_EXPADJ_4:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+	_MM_EXPADJ_5:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+	_MM_EXPADJ_8:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+	_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+	_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+	_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+	_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VROUNDPS" form="zmm, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Convert</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value using "expadj" and in the direction of "rounding", and store the results as packed single-precision floating-point elements in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ROUND(a[i+31:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+		_MM_EXPADJ_4:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+		_MM_EXPADJ_5:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+		_MM_EXPADJ_8:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+		_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+		_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+		_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+		_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VROUNDPS" form="zmm {k}, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_roundfxpnt_adjust_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Performs element-by-element rounding of packed single-precision (32-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst".
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+	_MM_EXPADJ_4:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+	_MM_EXPADJ_5:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+	_MM_EXPADJ_8:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+	_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+	_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+	_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+	_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDFXPNTPS" form="zmm, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_roundfxpnt_adjust_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI32"/>
+	<description>Performs element-by-element rounding of packed single-precision (32-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := ROUND(a[i+31:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+		_MM_EXPADJ_4:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+		_MM_EXPADJ_5:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+		_MM_EXPADJ_8:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+		_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+		_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+		_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+		_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDFXPNTPS" form="zmm {k}, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_roundfxpnt_adjust_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI64"/>
+	<description>Performs element-by-element rounding of packed double-precision (64-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst".
+	[round_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+	CASE expadj OF
+	_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+	_MM_EXPADJ_4:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+	_MM_EXPADJ_5:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+	_MM_EXPADJ_8:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+	_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+	_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+	_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+	_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDFXPNTPD" form="zmm, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_roundfxpnt_adjust_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<parameter type="_MM_EXP_ADJ_ENUM" varname="expadj" etype="UI64"/>
+	<description>Performs element-by-element rounding of packed double-precision (64-bit) floating-point elements in "a" using "expadj" and in the direction of "rounding" and stores results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
+	[round_note]</description>
+	<operation>FOR j := 0 to 7
+	i := j*64
+	IF k[j]
+		dst[i+63:i] := ROUND(a[i+63:i])
+		CASE expadj OF
+		_MM_EXPADJ_NONE: dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 0)
+		_MM_EXPADJ_4:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 4)
+		_MM_EXPADJ_5:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 5)
+		_MM_EXPADJ_8:	dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 8)
+		_MM_EXPADJ_16:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 16)
+		_MM_EXPADJ_24:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 24)
+		_MM_EXPADJ_31:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 31)
+		_MM_EXPADJ_32:   dst[i+31:i] := dst[i+31:i] * (2 &lt;&lt; 32)
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRNDFXPNTPD" form="zmm {k}, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_rsqrt23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Calculates the reciprocal square root of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of accuracy and stores the result in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := Sqrt(1.0 / a[i+31:i])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT23PS" form="zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_rsqrt23_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Calculates the reciprocal square root of packed single-precision (32-bit) floating-point elements in "a" to 23 bits of accuracy and stores the result in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := Sqrt(1.0 / a[i+31:i])
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VRSQRT23PS" form="zmm {k}, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_scale_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponent is the corresponding 32-bit integer element in "b", storing results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * POW(2.0, FP32(b[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEPS" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_scale_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponent is the corresponding 32-bit integer element in "b", storing results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * POW(2.0, FP32(b[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEPS" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_scale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exponent, where the exponent is the corresponding 32-bit integer element in "b", storing results in "dst". Intermediate elements are rounded using "rounding".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * POW(2.0,FP32(b[i+31:i]))
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEPS" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_scale_round_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512i" varname="b" etype="UI32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Scales each single-precision (32-bit) floating-point element in "a" by multiplying it by 2**exp, where the exp is the corresponding 32-bit integer element in "b", storing results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). Results are rounded using constant "rounding".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		dst[i+31:i] := a[i+31:i] * POW(2.0, FP32(b[i+31:i]))
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VSCALEPS" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_reduce_gmin_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Determines the minimum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>min := a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	dst := FpMin(min, a[i+31:i])
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_reduce_gmin_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Determines the minimum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst" using writemask "k" (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>min := a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst := FpMin(min, a[i+31:i])
+	FI
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_reduce_gmin_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Determines the minimum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>min := a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	dst := FpMin(min, a[i+63:i])
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_reduce_gmin_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Determines the minimum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>min := a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst := FpMin(min, a[i+63:i])
+	FI
+ENDFOR
+dst := min
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_reduce_gmax_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Determines the maximum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>max := a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	dst := FpMax(max, a[i+31:i])
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_reduce_gmax_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<description>Determines the maximum element of the packed single-precision (32-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>max := a[31:0]
+FOR j := 1 to 15
+	i := j*32
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst := FpMax(max, a[i+31:i])
+	FI
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_reduce_gmax_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Determines the maximum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst".</description>
+	<operation>max := a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	dst := FpMax(max, a[i+63:i])
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_reduce_gmax_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Special Math Functions</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<description>Determines the maximum element of the packed double-precision (64-bit) floating-point elements stored in "a" and stores the result in "dst". Bitmask "k" is used to exclude certain elements (elements are ignored when the corresponding mask bit is not set).</description>
+	<operation>max := a[63:0]
+FOR j := 1 to 7
+	i := j*64
+	IF k[j]
+		CONTINUE
+	ELSE
+		dst := FpMax(max, a[i+63:i])
+	FI
+ENDFOR
+dst := max
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_tzcnti_32">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="SI32"/>
+	<parameter type="unsigned int" varname="x" etype="UI32"/>
+	<description>Count the number of trailing zero bits in unsigned 32-bit integer "x" starting at bit "a", and return that count in "dst".</description>
+	<operation>
+tmp := a
+IF tmp &lt; 0
+	tmp := 0
+FI
+dst := 0
+IF tmp &gt; 31
+	dst := 32
+ELSE
+	DO WHILE ((tmp &lt; 32) AND x[tmp] == 0)
+		tmp := tmp + 1
+		dst := dst + 1
+	OD
+FI
+	</operation>
+	<instruction name="TZCNTI" form="r32, r32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_tzcnti_64">
+	<CPUID>KNCNI</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="SI64"/>
+	<parameter type="unsigned __int64" varname="x" etype="UI64"/>
+	<description>Count the number of trailing zero bits in unsigned 64-bit integer "x" starting at bit "a", and return that count in "dst".</description>
+	<operation>
+tmp := a
+IF tmp &lt; 0
+	tmp := 0
+FI
+dst := 0
+IF tmp &gt; 63
+	dst := 64
+ELSE
+	DO WHILE ((tmp &lt; 64) AND x[tmp] == 0)
+		tmp := tmp + 1
+		dst := dst + 1
+	OD
+FI
+	</operation>
+	<instruction name="TZCNTI" form="r64, r64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_delay_32">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="r1" etype="UI32"/>
+	<description>Stalls a thread without blocking other threads for 32-bit unsigned integer "r1" clock cycles.</description>
+	<operation>BlockThread(r1)
+	</operation>
+	<instruction name="DELAY" form="r32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_delay_64">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned __int64" varname="r1" etype="UI64"/>
+	<description>Stalls a thread without blocking other threads for 64-bit unsigned integer "r1" clock cycles.</description>
+	<operation>BlockThread(r1)
+	</operation>
+	<instruction name="DELAY" form="r64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_spflt_32">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="r1" etype="UI32"/>
+	<description>Set performance monitoring filtering mask to 32-bit unsigned integer "r1".</description>
+	<operation>SetPerfMonMask(r1[31:0])
+	</operation>
+	<instruction name="SPFLT" form="r32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_spflt_64">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned __int64" varname="r1" etype="UI64"/>
+	<description>Set performance monitoring filtering mask to 64-bit unsigned integer "r1".</description>
+	<operation>SetPerfMonMask(r1[63:0])
+	</operation>
+	<instruction name="SPFLT" form="r64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm_clevict">
+	<CPUID>KNCNI</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="const void *" varname="ptr"/>
+	<parameter type="int" varname="level" etype="UI32"/>
+	<description>Evicts the cache line containing the address "ptr" from cache level "level" (can be either 0 or 1).</description>
+	<operation>CacheLineEvict(ptr, level)
+	</operation>
+	<instruction name="CLEVICT0" form="m8"/>
+	<instruction name="CLEVICT1" form="m8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kandnr">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="dst" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Performs a bitwise AND operation between NOT of "k2" and "k1", storing the result in "dst".</description>
+	<operation>dst[15:0] := NOT(k2[15:0]) &amp; k1[15:0]
+	</operation>
+	<instruction name="KANDNR" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kswapb">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Moves high byte from "k2" to low byte of "k1", and moves low byte of "k2" to high byte of "k1".</description>
+	<operation>
+tmp[7:0] := k2[15:8]
+k2[15:8] := k1[7:0]
+k1[7:0]  := tmp[7:0]
+tmp[7:0] := k2[7:0]
+k2[7:0]  := k1[15:8]
+k1[15:8] := tmp[7:0]
+	</operation>
+	<instruction name="KMERGE2L1H" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kortestz">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". ZF flag is set if "dst" is 0.</description>
+	<operation>dst[15:0] := k1[15:0] | k2[15:0]
+IF dst == 0
+	SetZF()
+FI
+	</operation>
+	<instruction name="KORTEST" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kortestc">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Performs bitwise OR between "k1" and "k2", storing the result in "dst". CF flag is set if "dst" consists of all 1's.</description>
+	<operation>dst[15:0] := k1[15:0] | k2[15:0]
+IF PopCount(dst[15:0]) == 16
+	SetCF()
+FI
+	</operation>
+	<instruction name="KORTEST" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask2int">
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<description>Converts bit mask "k1" into an integer value, storing the results in "dst".</description>
+	<operation>
+dst := ZeroExtend32(k1)
+	</operation>
+	<instruction name="KMOV" form="r32, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_int2mask">
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="dst" etype="MASK"/>
+	<parameter type="int" varname="mask" etype="UI16"/>
+	<description>Converts integer "mask" into bitmask, storing the result in "dst".</description>
+	<operation>
+dst := mask[15:0]
+	</operation>
+	<instruction name="KMOV" form="k, r32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kconcathi_64">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Packs masks "k1" and "k2" into the high 32 bits of "dst". The rest of "dst" is set to 0.</description>
+	<operation>
+dst[63:48] := k1[15:0]
+dst[47:32] := k2[15:0]
+dst[31:0]  := 0
+	</operation>
+	<instruction name="KCONCATH" form="r64, k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kconcatlo_64">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Packs masks "k1" and "k2" into the low 32 bits of "dst". The rest of "dst" is set to 0.</description>
+	<operation>
+dst[31:16] := k1[15:0]
+dst[15:0]  := k2[15:0]
+dst[63:32] := 0
+	</operation>
+	<instruction name="KCONCATL" form="r64, k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kextract_64">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="dst" etype="MASK"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="b" etype="UI32"/>
+	<description>Extracts 16-bit value "b" from 64-bit integer "a", storing the result in "dst".</description>
+	<operation>
+CASE b[1:0] OF
+0: dst[15:0] := a[63:48]
+1: dst[15:0] := a[47:32]
+2: dst[15:0] := a[31:16]
+3: dst[15:0] := a[15:0]
+ESAC
+dst[MAX:15] := 0
+	</operation>
+	<instruction name="KEXTRACT" form="k, r64, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_fmadd233_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	base := (j &amp; ~0x3) * 32
+	scale[31:0] := b[base+63:base+32]
+	bias[31:0]  := b[base+31:base]
+	dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD233PS" form="zmm, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_fmadd233_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="__m512" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in each 4-element set of "a" and by element 1 of the corresponding 4-element set from "b", add the intermediate result to element 0 of the corresponding 4-element set from "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*32
+	IF k[j]
+		base := (j &amp; ~0x3) * 32
+		scale[31:0] := b[base+63:base+32]
+		bias[31:0]  := b[base+31:base]
+		dst[i+31:i] := (a[i+31:i] * scale[31:0]) + bias[31:0]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VFMADD233PS" form="zmm {k}, zmm, m512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extgather_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 single-precision (32-bit) memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_EPI32_NONE:   dst[i+31:i] := MEM[addr+31:addr]
+	_MM_UPCONV_EPI32_UINT8:  dst[i+31:i] := ZeroExtend32(MEM[addr+7:addr])
+	_MM_UPCONV_EPI32_SINT8:  dst[i+31:i] := SignExtend32(MEM[addr+7:addr])
+	_MM_UPCONV_EPI32_UINT16: dst[i+31:i] := ZeroExtend32(MEM[addr+15:addr])
+	_MM_UPCONV_EPI32_SINT16: dst[i+31:i] := SignExtend32(MEM[addr+15:addr])
+	ESAC
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extgather_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI32"/>
+	<parameter type="_MM_UPCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 single-precision (32-bit) memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 32-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_EPI32_NONE:   dst[i+31:i] := MEM[addr+31:addr]
+		_MM_UPCONV_EPI32_UINT8:  dst[i+31:i] := ZeroExtend32(MEM[addr+7:addr])
+		_MM_UPCONV_EPI32_SINT8:  dst[i+31:i] := SignExtend32(MEM[addr+7:addr])
+		_MM_UPCONV_EPI32_UINT16: dst[i+31:i] := ZeroExtend32(MEM[addr+15:addr])
+		_MM_UPCONV_EPI32_SINT16: dst[i+31:i] := SignExtend32(MEM[addr+15:addr])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extgather_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst". "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_EPI64_NONE: dst[i+63:i] := MEM[addr+63:addr]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extgather_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI64"/>
+	<parameter type="__m512i" varname="src" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const*" varname="base_addr" etype="UI64"/>
+	<parameter type="_MM_UPCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 64-bit integer elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_EPI64_NONE: dst[i+63:i] := MEM[addr+63:addr]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extgather_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in the lower half of "dst". "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_PS_NONE:    dst[i+31:i] := MEM[addr+31:addr]
+	_MM_UPCONV_PS_FLOAT16: dst[i+31:i] := Convert_FP16_To_FP32(MEM[addr+15:addr])
+	_MM_UPCONV_PS_UINT8:   dst[i+31:i] := Convert_UInt8_To_FP32(MEM[addr+7:addr])
+	_MM_UPCONV_PS_SINT8:   dst[i+31:i] := Convert_Int8_To_FP32(MEM[addr+7:addr])
+	_MM_UPCONV_PS_UINT16:  dst[i+31:i] := Convert_UInt16_To_FP32(MEM[addr+15:addr])
+	_MM_UPCONV_PS_SINT16:  dst[i+31:i] := Convert_Int16_To_FP32(MEM[addr+15:addr])
+	ESAC
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extgather_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="_MM_UPCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to single-precision (32-bit) floating-point elements and stores them in the lower half of "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PS_NONE:    dst[i+31:i] := MEM[addr+31:addr]
+		_MM_UPCONV_PS_FLOAT16: dst[i+31:i] := Convert_FP16_To_FP32(MEM[addr+15:addr])
+		_MM_UPCONV_PS_UINT8:   dst[i+31:i] := Convert_UInt8_To_FP32(MEM[addr+7:addr])
+		_MM_UPCONV_PS_SINT8:   dst[i+31:i] := Convert_Int8_To_FP32(MEM[addr+7:addr])
+		_MM_UPCONV_PS_UINT16:  dst[i+31:i] := Convert_UInt16_To_FP32(MEM[addr+15:addr])
+		_MM_UPCONV_PS_SINT16:  dst[i+31:i] := Convert_Int16_To_FP32(MEM[addr+15:addr])
+		ESAC
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extgather_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements stored in memory starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_UPCONV_PD_NONE: dst[i+63:i] := MEM[addr+63:addr]
+	ESAC
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extgather_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512d" varname="dst" etype="FP64"/>
+	<parameter type="__m512d" varname="src" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP64"/>
+	<parameter type="_MM_UPCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_UPCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Up-converts 8 double-precision (64-bit) floating-point elements stored in memory starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using "conv" to 64-bit floating-point elements and stores them in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_UPCONV_PD_NONE: dst[i+63:i] := MEM[addr+63:addr]
+		ESAC
+	ELSE
+		dst[i+63:i] := src[i+63:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extscatter_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed single-precision (32-bit) floating-point elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_PS_NONE:    MEM[addr+31:addr] := a[i+31:i]
+	_MM_DOWNCONV_PS_FLOAT16: MEM[addr+15:addr] := Convert_FP32_To_FP16(a[i+31:i])
+	_MM_DOWNCONV_PS_UINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_UInt8(a[i+31:i])
+	_MM_DOWNCONV_PS_SINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_Int8(a[i+31:i])
+	_MM_DOWNCONV_PS_UINT16:  MEM[addr+15:addr] := Convert_FP32_To_UInt16(a[i+31:i])
+	_MM_DOWNCONV_PS_SINT16:  MEM[addr+15:addr] := Convert_FP32_To_Int16(a[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extscatter_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_DOWNCONV_PS_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PS"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed single-precision (32-bit) floating-point elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". Elements are only written when the corresponding mask bit is set in "k"; otherwise, elements are unchanged in memory. "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_PS_NONE:    MEM[addr+31:addr] := a[i+31:i]
+		_MM_DOWNCONV_PS_FLOAT16: MEM[addr+15:addr] := Convert_FP32_To_FP16(a[i+31:i])
+		_MM_DOWNCONV_PS_UINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_UInt8(a[i+31:i])
+		_MM_DOWNCONV_PS_SINT8:   MEM[addr+ 7:addr] := Convert_FP32_To_Int8(a[i+31:i])
+		_MM_DOWNCONV_PS_UINT16:  MEM[addr+15:addr] := Convert_FP32_To_UInt16(a[i+31:i])
+		_MM_DOWNCONV_PS_SINT16:  MEM[addr+15:addr] := Convert_FP32_To_Int16(a[i+31:i])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE: MEM[addr+63:addr] := a[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extscatter_pd">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512d" varname="a" etype="FP64"/>
+	<parameter type="_MM_DOWNCONV_PD_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_PD"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed double-precision (64-bit) floating-point elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". Elements are written to memory using writemask "k" (elements are not stored to memory when the corresponding mask bit is not set; the memory location is left unchagned). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE: MEM[addr+63:addr] := a[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extscatter_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts the low 8 packed 32-bit integer elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_EPI32_NONE:   MEM[addr+31:addr] := a[i+31:i]
+	_MM_DOWNCONV_EPI32_UINT8:  MEM[addr+ 7:addr] := Truncate8(a[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT8:  MEM[addr+ 7:addr] := Saturate8(a[i+31:i])
+	_MM_DOWNCONV_EPI32_UINT16: MEM[addr+15:addr] := Truncate16(a[i+31:i])
+	_MM_DOWNCONV_EPI32_SINT16: MEM[addr+15:addr] := Saturate16(a[i+31:i])
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extscatter_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI32"/>
+	<parameter type="_MM_DOWNCONV_EPI32_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts the low 8 packed 32-bit integer elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". Elements are written to memory using writemask "k" (elements are only written when the corresponding mask bit is set; otherwise, the memory location is left unchanged). "hint" indicates to the processor whether the data is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI32_NONE:   MEM[addr+31:addr] := a[i+31:i]
+		_MM_DOWNCONV_EPI32_UINT8:  MEM[addr+ 7:addr] := Truncate8(a[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT8:  MEM[addr+ 7:addr] := Saturate8(a[i+31:i])
+		_MM_DOWNCONV_EPI32_UINT16: MEM[addr+15:addr] := Truncate16(a[i+31:i])
+		_MM_DOWNCONV_EPI32_SINT16: MEM[addr+15:addr] := Saturate16(a[i+31:i])
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64extscatter_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI64"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". "hint" indicates to the processor whether the load is non-temporal.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	CASE conv OF
+	_MM_DOWNCONV_EPI64_NONE: MEM[addr+63:addr] := a[i+63:i]
+	ESAC
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64extscatter_epi64">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI64"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="UI64"/>
+	<parameter type="_MM_DOWNCONV_EPI64_ENUM" varname="conv" etype="IMM" immtype="_MM_DOWNCONV_EPI64"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<parameter type="int" varname="hint" etype="UI32" hint="TRUE" immtype="_MM_HINT_EXT"/>
+	<description>Down-converts 8 packed 64-bit integer elements in "a" using "conv" and stores them in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale". Only those elements whose corresponding mask bit is set in writemask "k" are written to memory.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*64
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	
+	IF k[j]
+		CASE conv OF
+		_MM_DOWNCONV_EPI64_NONE: MEM[addr+63:addr] := a[i+63:i]
+		ESAC
+	FI
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_permute4f128_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Permutes 128-bit blocks of the packed single-precision (32-bit) floating-point elements in "a" using constant "imm8". The results are stored in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control)  {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+FOR j := 0 to 3
+	i := j*128
+	n := j*2
+	dst[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMF32X4" form="zmm, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_mask_permute4f128_ps">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Swizzle</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="_MM_PERM_ENUM" varname="imm8" etype="IMM" immtype="_MM_PERM"/>
+	<description>Permutes 128-bit blocks of the packed single-precision (32-bit) floating-point elements in "a" using constant "imm8". The results are stored in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+DEFINE SELECT4(src, control)  {
+	CASE control[1:0] OF
+	0: tmp[127:0] := src[127:0]
+	1: tmp[127:0] := src[255:128]
+	2: tmp[127:0] := src[383:256]
+	3: tmp[127:0] := src[511:384]
+	ESAC
+	RETURN tmp[127:0]
+}
+tmp[511:0] := 0
+FOR j := 0 to 3
+	i := j*128
+	n := j*2
+	tmp[i+127:i] := SELECT4(a[511:0], imm8[n+1:n])
+ENDFOR
+FOR j := 0 to 15
+	IF k[j]
+		dst[i+31:i] := tmp[i+31:i]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPERMF32X4" form="zmm {k}, m512, imm8"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64gather_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 32-bit integer memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" to "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64gather_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512i" varname="dst" etype="UI32"/>
+	<parameter type="__m512i" varname="src" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="UI32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 32-bit integer memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64gather_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 single-precision (32-bit) floating-point memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" to "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	dst[i+31:i] := MEM[addr+31:addr]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64gather_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Load</category>
+	<return type="__m512" varname="dst" etype="FP32"/>
+	<parameter type="__m512" varname="src" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="void const *" varname="base_addr" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Loads 8 single-precision (32-bit) floating-point memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" to "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		dst[i+31:i] := MEM[addr+31:addr]
+	ELSE
+		dst[i+31:i] := src[i+31:i]
+	FI
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64scatter_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed single-precision (32-bit) floating-point elements in "a" in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64scatter_pslo">
+	<type>Floating Point</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="FP32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512" varname="a" etype="FP32"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed single-precision (32-bit) floating-point elements in "a" in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using writemask "k" (elements are only written to memory when the corresponding mask bit is set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI	
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_i64scatter_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI32"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed 32-bit integer elements in "a" in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+	MEM[addr+31:addr] := a[i+31:i]
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" sequence="TRUE" name="_mm512_mask_i64scatter_epi32lo">
+	<type>Integer</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="base_addr" etype="UI32"/>
+	<parameter type="__mmask8" varname="k" etype="MASK"/>
+	<parameter type="__m512i" varname="vindex" etype="SI64"/>
+	<parameter type="__m512i" varname="a" etype="M512"/>
+	<parameter type="int" varname="scale" etype="IMM" immtype="_MM_INDEX_SCALE"/>
+	<description>Stores 8 packed 32-bit integer elements in "a" in memory locations starting at location "base_addr" at packed 64-bit integer indices stored in "vindex" scaled by "scale" using writemask "k" (elements are only written to memory when the corresponding mask bit is set).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*32
+	m := j*64
+	IF k[j]
+		addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
+		MEM[addr+31:addr] := a[i+31:i]
+	FI	
+ENDFOR
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kmerge2l1h">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Move the high element from "k1" to the low element of "k1", and insert the low element of "k2" into the high element of "k1".</description>
+	<operation>
+tmp[7:0] := k1[15:8]
+k1[15:8] := k2[7:0]
+k1[7:0]  := tmp[7:0]
+	</operation>
+	<instruction name="KMERGE2L1H" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="KNC" name="_mm512_kmerge2l1l">
+	<type>Mask</type>
+	<CPUID>KNCNI</CPUID>
+	<category>Mask</category>
+	<return type="__mmask16" varname="k" etype="MASK"/>
+	<parameter type="__mmask16" varname="k1" etype="MASK"/>
+	<parameter type="__mmask16" varname="k2" etype="MASK"/>
+	<description>Insert the low element of "k2" into the high element of "k1".</description>
+	<operation>
+k1[15:8] := k2[7:0]
+	</operation>
+	<instruction name="KMERGE2L1L" form="k, k"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_lzcnt_u32">
+	<type>Integer</type>
+	<CPUID>LZCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Count the number of leading zero bits in unsigned 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 31
+dst := 0
+DO WHILE (tmp &gt;= 0 AND a[tmp] == 0)
+	tmp := tmp - 1
+	dst := dst + 1
+OD
+	</operation>
+	<instruction name="LZCNT" form="r32, r32" xed="LZCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_lzcnt_u64">
+	<type>Integer</type>
+	<CPUID>LZCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Count the number of leading zero bits in unsigned 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+tmp := 63
+dst := 0
+DO WHILE (tmp &gt;= 0 AND a[tmp] == 0)
+	tmp := tmp - 1
+	dst := dst + 1
+OD
+	</operation>
+	<instruction name="LZCNT" form="r64, r64" xed="LZCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_from_int64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="mm, r64" xed="MOVQ_MMXq_GPR64"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_to_int64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m64" varname="a" etype="FP32"/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="r64, mm" xed="MOVQ_GPR64_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_empty">
+	<CPUID>MMX</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.</description>
+	<instruction name="EMMS" xed="EMMS"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_from_int">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := 0
+	</operation>
+	<instruction name="MOVD" form="mm, r32" xed="MOVD_MMXq_GPR32"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_to_int">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m64" varname="a" etype="FP32"/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="MOVD" form="r32, mm" xed="MOVD_GPR32_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_packsswb">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="SI8"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate8(a[15:0])
+dst[15:8] := Saturate8(a[31:16])
+dst[23:16] := Saturate8(a[47:32])
+dst[31:24] := Saturate8(a[63:48])
+dst[39:32] := Saturate8(b[15:0])
+dst[47:40] := Saturate8(b[31:16])
+dst[55:48] := Saturate8(b[47:32])
+dst[63:56] := Saturate8(b[63:48])
+	</operation>
+	<instruction name="PACKSSWB" form="mm, mm" xed="PACKSSWB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_packssdw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:0])
+dst[31:16] := Saturate16(a[63:32])
+dst[47:32] := Saturate16(b[31:0])
+dst[63:48] := Saturate16(b[63:32])
+	</operation>
+	<instruction name="PACKSSDW" form="mm, mm" xed="PACKSSDW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_packuswb">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := SaturateU8(a[15:0])
+dst[15:8] := SaturateU8(a[31:16])
+dst[23:16] := SaturateU8(a[47:32])
+dst[31:24] := SaturateU8(a[63:48])
+dst[39:32] := SaturateU8(b[15:0])
+dst[47:40] := SaturateU8(b[31:16])
+dst[55:48] := SaturateU8(b[47:32])
+dst[63:56] := SaturateU8(b[63:48])
+	</operation>
+	<instruction name="PACKUSWB" form="mm, mm" xed="PACKUSWB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_punpckhbw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) {
+	dst[7:0] := src1[39:32]
+	dst[15:8] := src2[39:32] 
+	dst[23:16] := src1[47:40]
+	dst[31:24] := src2[47:40]
+	dst[39:32] := src1[55:48]
+	dst[47:40] := src2[55:48]
+	dst[55:48] := src1[63:56]
+	dst[63:56] := src2[63:56]
+	RETURN dst[63:0]
+}
+dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKHBW" form="mm, mm" xed="PUNPCKHBW_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_punpckhwd">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) {
+	dst[15:0] := src1[47:32]
+	dst[31:16] := src2[47:32]
+	dst[47:32] := src1[63:48]
+	dst[63:48] := src2[63:48]
+	RETURN dst[63:0]
+}
+dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKLBW" form="mm, mm" xed="PUNPCKLBW_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_punpckhdq">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32]
+dst[63:32] := b[63:32]
+	</operation>
+	<instruction name="PUNPCKHDQ" form="mm, mm" xed="PUNPCKHDQ_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_punpcklbw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	RETURN dst[63:0]	
+}
+dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKLBW" form="mm, mm" xed="PUNPCKLBW_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_punpcklwd">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	RETURN dst[63:0]	
+}
+dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKLWD" form="mm, mm" xed="PUNPCKLWD_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_punpckldq">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := b[31:0]
+	</operation>
+	<instruction name="PUNPCKLDQ" form="mm, mm" xed="PUNPCKLDQ_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_paddb">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDB" form="mm, mm" xed="PADDB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_paddw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDW" form="mm, mm" xed="PADDW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_paddd">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDD" form="mm, mm" xed="PADDD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_paddsb">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDSB" form="mm, mm" xed="PADDSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_paddsw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDSW" form="mm, mm" xed="PADDSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_paddusb">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDUSB" form="mm, mm" xed="PADDUSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_paddusw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDUSW" form="mm, mm" xed="PADDUSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psubb">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBB" form="mm, mm" xed="PSUBB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psubw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBW" form="mm, mm" xed="PSUBW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psubd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBD" form="mm, mm" xed="PSUBD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psubsb">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBSB" form="mm, mm" xed="PSUBSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psubsw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PSUBSW" form="mm, mm" xed="PSUBSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psubusb">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBUSB" form="mm, mm" xed="PSUBUSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psubusw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBUSW" form="mm, mm" xed="PSUBUSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pmaddwd">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMADDWD" form="mm, mm" xed="PMADDWD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pmulhw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name="PMULHW" form="mm, mm" xed="PMULHW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pmullw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+	</operation>
+	<instruction name="PMULLW" form="mm, mm" xed="PMULLW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psllw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLW" form="mm, mm" xed="PSLLW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psllwi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLW" form="mm, imm8" xed="PSLLW_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pslld">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLD" form="mm, mm" xed="PSLLD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pslldi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLD" form="mm, imm8" xed="PSLLD_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psllq">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &lt;&lt; count[63:0])
+FI
+	</operation>
+	<instruction name="PSLLQ" form="mm, mm" xed="PSLLQ_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psllqi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &lt;&lt; imm8[7:0])
+FI
+	</operation>
+	<instruction name="PSLLQ" form="mm, imm8" xed="PSLLQ_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psraw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAW" form="mm, mm" xed="PSRAW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrawi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAW" form="mm, imm8" xed="PSRAW_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrad">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAD" form="mm, mm" xed="PSRAD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psradi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAD" form="mm, imm8" xed="PSRAD_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrlw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLW" form="mm, mm" xed="PSRLW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrlwi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLW" form="mm, imm8" xed="PSRLW_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrld">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLD" form="mm, mm" xed="PSRLD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrldi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLD" form="mm, imm8" xed="PSRLD_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrlq">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &gt;&gt; count[63:0])
+FI
+	</operation>
+	<instruction name="PSRLQ" form="mm, mm" xed="PSRLQ_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_psrlqi">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &gt;&gt; imm8[7:0])
+FI
+	</operation>
+	<instruction name="PSRLQ" form="mm, imm8" xed="PSRLQ_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pand">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] AND b[63:0])
+	</operation>
+	<instruction name="PAND" form="mm, mm" xed="PAND_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pandn">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := ((NOT a[63:0]) AND b[63:0])
+	</operation>
+	<instruction name="PANDN" form="mm, mm" xed="PANDN_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_por">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] OR b[63:0])
+	</operation>
+	<instruction name="POR" form="mm, mm" xed="POR_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pxor">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] XOR b[63:0])
+	</operation>
+	<instruction name="PXOR" form="mm, mm" xed="PXOR_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pcmpeqb">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQB" form="mm, mm" xed="PCMPEQB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pcmpeqw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQW" form="mm, mm" xed="PCMPEQW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pcmpeqd">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQD" form="mm, mm" xed="PCMPEQD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pcmpgtb">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTB" form="mm, mm" xed="PCMPGTB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pcmpgtw">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTW" form="mm, mm" xed="PCMPGTW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_m_pcmpgtd">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI64"/>
+	<parameter type="__m64" varname="b" etype="SI64"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTD" form="mm, mm" xed="PCMPGTD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_empty">
+	<CPUID>MMX</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.</description>
+	<instruction name="EMMS" xed="EMMS"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_add_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDB" form="mm, mm" xed="PADDB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_add_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDW" form="mm, mm" xed="PADDW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_add_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDD" form="mm, mm" xed="PADDD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_adds_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI8"/>
+	<parameter type="__m64" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDSB" form="mm, mm" xed="PADDSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_adds_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDSW" form="mm, mm" xed="PADDSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_adds_pu8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDUSB" form="mm, mm" xed="PADDUSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_adds_pu16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDUSW" form="mm, mm" xed="PADDUSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sub_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBB" form="mm, mm" xed="PSUBB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sub_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBW" form="mm, mm" xed="PSUBW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sub_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBD" form="mm, mm" xed="PSUBD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_subs_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI8"/>
+	<parameter type="__m64" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBSB" form="mm, mm" xed="PSUBSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_subs_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PSUBSW" form="mm, mm" xed="PSUBSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_subs_pu8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBUSB" form="mm, mm" xed="PSUBUSB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_subs_pu16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBUSW" form="mm, mm" xed="PSUBUSW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_madd_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMADDWD" form="mm, mm" xed="PMADDWD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_mulhi_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name="PMULHW" form="mm, mm" xed="PMULHW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_mullo_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+	</operation>
+	<instruction name="PMULLW" form="mm, mm" xed="PMULLW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sll_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLW" form="mm, mm" xed="PSLLW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_slli_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLW" form="mm, imm8" xed="PSLLW_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sll_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLD" form="mm, mm" xed="PSLLD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_slli_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLD" form="mm, imm8" xed="PSLLD_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sll_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift 64-bit integer "a" left by "count" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &lt;&lt; count[63:0])
+FI
+	</operation>
+	<instruction name="PSLLQ" form="mm, mm" xed="PSLLQ_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_slli_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 64-bit integer "a" left by "imm8" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &lt;&lt; imm8[7:0])
+FI
+	</operation>
+	<instruction name="PSLLQ" form="mm, imm8" xed="PSLLQ_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sra_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAW" form="mm, mm" xed="PSRAW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srai_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAW" form="mm, imm8" xed="PSRAW_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_sra_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAD" form="mm, mm" xed="PSRAD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srai_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAD" form="mm, imm8" xed="PSRAD_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srl_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLW" form="mm, mm" xed="PSRLW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srli_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLW" form="mm, imm8" xed="PSRLW_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srl_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLD" form="mm, mm" xed="PSRLD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srli_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLD" form="mm, imm8" xed="PSRLD_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srl_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="count" etype="UI64"/>
+	<description>Shift 64-bit integer "a" right by "count" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF count[63:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &gt;&gt; count[63:0])
+FI
+	</operation>
+	<instruction name="PSRLQ" form="mm, mm" xed="PSRLQ_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_srli_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Shift</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift 64-bit integer "a" right by "imm8" while shifting in zeros, and store the result in "dst".</description>
+	<operation>
+IF imm8[7:0] &gt; 63
+	dst[63:0] := 0
+ELSE
+	dst[63:0] := ZeroExtend64(a[63:0] &gt;&gt; imm8[7:0])
+FI
+	</operation>
+	<instruction name="PSRLQ" form="mm, imm8" xed="PSRLQ_MMXq_IMMb"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_and_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise AND of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] AND b[63:0])
+	</operation>
+	<instruction name="PAND" form="mm, mm" xed="PAND_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_andnot_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise NOT of 64 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := ((NOT a[63:0]) AND b[63:0])
+	</operation>
+	<instruction name="PANDN" form="mm, mm" xed="PANDN_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_or_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise OR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] OR b[63:0])
+	</operation>
+	<instruction name="POR" form="mm, mm" xed="POR_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_xor_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Logical</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Compute the bitwise XOR of 64 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] XOR b[63:0])
+	</operation>
+	<instruction name="PXOR" form="mm, mm" xed="PXOR_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cmpeq_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQB" form="mm, mm" xed="PCMPEQB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cmpeq_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQW" form="mm, mm" xed="PCMPEQW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cmpeq_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="b" etype="UI32"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQD" form="mm, mm" xed="PCMPEQD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cmpgt_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI8"/>
+	<parameter type="__m64" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTB" form="mm, mm" xed="PCMPGTB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cmpgt_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTW" form="mm, mm" xed="PCMPGTW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cmpgt_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Compare</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTD" form="mm, mm" xed="PCMPGTD_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cvtsi32_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper element of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := 0
+	</operation>
+	<instruction name="MOVD" form="mm, r32" xed="MOVD_MMXq_GPR32"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cvtsi64_si32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m64" varname="a" etype="FP32"/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="MOVD" form="r32, mm" xed="MOVD_GPR32_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cvtm64_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m64" varname="a" etype="FP32"/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="r64, mm" xed="MOVQ_GPR64_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_cvtsi64_m64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Copy 64-bit integer "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="mm, r64" xed="MOVQ_MMXq_GPR64"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_setzero_si64">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m64 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="PXOR" form="mm, mm" xed="PXOR_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_set_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_set_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="short" varname="e3" etype="UI16"/>
+	<parameter type="short" varname="e2" etype="UI16"/>
+	<parameter type="short" varname="e1" etype="UI16"/>
+	<parameter type="short" varname="e0" etype="UI16"/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_set_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="char" varname="e7" etype="UI8"/>
+	<parameter type="char" varname="e6" etype="UI8"/>
+	<parameter type="char" varname="e5" etype="UI8"/>
+	<parameter type="char" varname="e4" etype="UI8"/>
+	<parameter type="char" varname="e3" etype="UI8"/>
+	<parameter type="char" varname="e2" etype="UI8"/>
+	<parameter type="char" varname="e1" etype="UI8"/>
+	<parameter type="char" varname="e0" etype="UI8"/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_set1_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_set1_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast 16-bit integer "a" to all all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_set1_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_setr_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e1
+dst[63:32] := e0
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_setr_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="short" varname="e3" etype="UI16"/>
+	<parameter type="short" varname="e2" etype="UI16"/>
+	<parameter type="short" varname="e1" etype="UI16"/>
+	<parameter type="short" varname="e0" etype="UI16"/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[15:0] := e3
+dst[31:16] := e2
+dst[47:32] := e1
+dst[63:48] := e0
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" sequence="TRUE" name="_mm_setr_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Set</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="char" varname="e7" etype="UI8"/>
+	<parameter type="char" varname="e6" etype="UI8"/>
+	<parameter type="char" varname="e5" etype="UI8"/>
+	<parameter type="char" varname="e4" etype="UI8"/>
+	<parameter type="char" varname="e3" etype="UI8"/>
+	<parameter type="char" varname="e2" etype="UI8"/>
+	<parameter type="char" varname="e1" etype="UI8"/>
+	<parameter type="char" varname="e0" etype="UI8"/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e7
+dst[15:8] := e6
+dst[23:16] := e5
+dst[31:24] := e4
+dst[39:32] := e3
+dst[47:40] := e2
+dst[55:48] := e1
+dst[63:56] := e0
+	</operation>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_packs_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="SI8"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate8(a[15:0])
+dst[15:8] := Saturate8(a[31:16])
+dst[23:16] := Saturate8(a[47:32])
+dst[31:24] := Saturate8(a[63:48])
+dst[39:32] := Saturate8(b[15:0])
+dst[47:40] := Saturate8(b[31:16])
+dst[55:48] := Saturate8(b[47:32])
+dst[63:56] := Saturate8(b[63:48])
+	</operation>
+	<instruction name="PACKSSWB" form="mm, mm" xed="PACKSSWB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_packs_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:0])
+dst[31:16] := Saturate16(a[63:32])
+dst[47:32] := Saturate16(b[31:0])
+dst[63:48] := Saturate16(b[63:32])
+	</operation>
+	<instruction name="PACKSSDW" form="mm, mm" xed="PACKSSDW_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_packs_pu16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := SaturateU8(a[15:0])
+dst[15:8] := SaturateU8(a[31:16])
+dst[23:16] := SaturateU8(a[47:32])
+dst[31:24] := SaturateU8(a[63:48])
+dst[39:32] := SaturateU8(b[15:0])
+dst[47:40] := SaturateU8(b[31:16])
+dst[55:48] := SaturateU8(b[47:32])
+dst[63:56] := SaturateU8(b[63:48])
+	</operation>
+	<instruction name="PACKUSWB" form="mm, mm" xed="PACKUSWB_MMXq_MMXq"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_unpackhi_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]) {
+	dst[7:0] := src1[39:32]
+	dst[15:8] := src2[39:32] 
+	dst[23:16] := src1[47:40]
+	dst[31:24] := src2[47:40]
+	dst[39:32] := src1[55:48]
+	dst[47:40] := src2[55:48]
+	dst[55:48] := src1[63:56]
+	dst[63:56] := src2[63:56]
+	RETURN dst[63:0]	
+}
+dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKHBW" form="mm, mm" xed="PUNPCKHBW_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_unpackhi_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]) {
+	dst[15:0] := src1[47:32]
+	dst[31:16] := src2[47:32]
+	dst[47:32] := src1[63:48]
+	dst[63:48] := src2[63:48]
+	RETURN dst[63:0]
+}
+dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKLBW" form="mm, mm" xed="PUNPCKLBW_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_unpackhi_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32]
+dst[63:32] := b[63:32]
+	</operation>
+	<instruction name="PUNPCKHDQ" form="mm, mm" xed="PUNPCKHDQ_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_unpacklo_pi8">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[63:0], src2[63:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	RETURN dst[63:0]	
+}
+dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKLBW" form="mm, mm" xed="PUNPCKLBW_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_unpacklo_pi16">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[63:0], src2[63:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	RETURN dst[63:0]	
+}
+dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])
+	</operation>
+	<instruction name="PUNPCKLWD" form="mm, mm" xed="PUNPCKLWD_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="MMX" name="_mm_unpacklo_pi32">
+	<type>Integer</type>
+	<CPUID>MMX</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := b[31:0]
+	</operation>
+	<instruction name="PUNPCKLDQ" form="mm, mm" xed="PUNPCKLDQ_MMXq_MMXd"/>
+	<header>mmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_monitor">
+	<CPUID>MONITOR</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void const*" varname="p"/>
+	<parameter type="unsigned" varname="extensions" etype="UI32"/>
+	<parameter type="unsigned" varname="hints" etype="UI32"/>
+	<description>Arm address monitoring hardware using the address specified in "p". A store to an address within the specified address range triggers the monitoring hardware. Specify optional extensions in "extensions", and optional hints in "hints".</description>
+	<instruction name="MONITOR" xed="MONITOR"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_mwait">
+	<CPUID>MONITOR</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned" varname="extensions" etype="UI32"/>
+	<parameter type="unsigned" varname="hints" etype="UI32"/>
+	<description>Hint to the processor that it can enter an implementation-dependent-optimized state while waiting for an event or store operation to the address range specified by MONITOR.</description>
+	<instruction name="MWAIT" xed="MWAIT"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_loadbe_i16">
+	<CPUID>MOVBE</CPUID>
+	<category>Load</category>
+	<return type="short" varname="dst" etype="UI16"/>
+	<parameter type="void const *" varname="ptr" etype="UI16" memwidth="16"/>
+	<description>Load 16 bits from memory, perform a byte swap operation, and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*8
+	dst[i+7:i] := MEM[ptr+15-i:ptr+8-i]
+ENDFOR
+	</operation>
+	<instruction name="MOVBE" form="r16, m16" xed="MOVBE_GPRv_MEMv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_loadbe_i32">
+	<CPUID>MOVBE</CPUID>
+	<category>Load</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="void const *" varname="ptr" etype="UI32" memwidth="32"/>
+	<description>Load 32 bits from memory, perform a byte swap operation, and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*8
+	dst[i+7:i] := MEM[ptr+31-i:ptr+24-i]
+ENDFOR
+	</operation>
+	<instruction name="MOVBE" form="r32, m32" xed="MOVBE_GPRv_MEMv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_loadbe_i64">
+	<CPUID>MOVBE</CPUID>
+	<category>Load</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="void const *" varname="ptr" etype="UI64" memwidth="64"/>
+	<description>Load 64 bits from memory, perform a byte swap operation, and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := MEM[ptr+63-i:ptr+56-i]
+ENDFOR
+	</operation>
+	<instruction name="MOVBE" form="r64, m64" xed="MOVBE_GPRv_MEMv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_storebe_i16">
+	<CPUID>MOVBE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="ptr" etype="UI16" memwidth="16"/>
+	<parameter type="short" varname="data" etype="UI16"/>
+	<description>Perform a bit swap operation of the 16 bits in "data", and store the results to memory.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*8
+	MEM[ptr+i+7:ptr+i] := data[15-i:8-i]
+ENDFOR
+	</operation>
+	<instruction name="MOVBE" form="m16, r16" xed="MOVBE_MEMv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_storebe_i32">
+	<CPUID>MOVBE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="ptr" etype="UI32" memwidth="32"/>
+	<parameter type="int" varname="data" etype="UI32"/>
+	<description>Perform a bit swap operation of the 32 bits in "data", and store the results to memory.</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 3
+	i := j*8
+	MEM[ptr+i+7:ptr+i] := data[31-i:24-i]
+ENDFOR
+	</operation>
+	<instruction name="MOVBE" form="m32, r32" xed="MOVBE_MEMv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_storebe_i64">
+	<CPUID>MOVBE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void *" varname="ptr" etype="UI64" memwidth="64"/>
+	<parameter type="__int64" varname="data" etype="UI64"/>
+	<description>Perform a bit swap operation of the 64 bits in "data", and store the results to memory.</description>
+	<operation>
+addr := MEM[ptr]
+FOR j := 0 to 7
+	i := j*8
+	MEM[ptr+i+7:ptr+i] := data[63-i:56-i]
+ENDFOR
+	</operation>
+	<instruction name="MOVBE" form="m64, r64" xed="MOVBE_MEMv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_movdir64b">
+	<CPUID>MOVDIR64B</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="dst" etype="M512" memwidth="512"/>
+	<parameter type="const void*" varname="src" etype="M512" memwidth="512"/>
+	<description>Move 64-byte (512-bit) value using direct store from source memory address "src" to destination memory address "dst".</description>
+	<operation>
+MEM[dst+511:dst] := MEM[src+511:src]
+	</operation>
+	<instruction name="MOVDIR64B" form="r64, m512" xed="MOVDIR64B_GPRa_MEM"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_directstoreu_u64">
+	<CPUID>MOVDIRI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="dst" etype="UI64" memwidth="64"/>
+	<parameter type="unsigned __int64" varname="val" etype="UI64"/>
+	<description>Store 64-bit integer from "val" into memory using direct store.</description>
+	<operation>
+MEM[dst+63:dst] := val[63:0]
+	</operation>
+	<instruction name="MOVDIRI" form="m64, r64" xed="MOVDIRI_MEMu64_GPR64u64"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_directstoreu_u32">
+	<CPUID>MOVDIRI</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="dst" etype="UI32" memwidth="32"/>
+	<parameter type="unsigned int" varname="val" etype="UI32"/>
+	<description>Store 32-bit integer from "val" into memory using direct store.</description>
+	<operation>
+MEM[dst+31:dst] := val[31:0]
+	</operation>
+	<instruction name="MOVDIRI" form="m32, r32" xed="MOVDIRI_MEMu32_GPR32u32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bnd_set_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void *"/>
+	<parameter type="const void *" varname="srcmem"/>
+	<parameter type="size_t" varname="size" etype="UI64"/>
+	<description>Make a pointer with the value of "srcmem" and bounds set to ["srcmem", "srcmem" + "size" - 1], and store the result in "dst".</description>
+	<operation>dst := srcmem
+dst.LB := srcmem.LB
+dst.UB := srcmem + size - 1
+	</operation>
+	<instruction name="BNDMK" form="bnd, m32" xed="BNDMK_BND_AGEN"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_bnd_narrow_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void *"/>
+	<parameter type="const void *" varname="q"/>
+	<parameter type="const void *" varname="r"/>
+	<parameter type="size_t" varname="size" etype="UI64"/>
+	<description>Narrow the bounds for pointer "q" to the intersection of the bounds of "r" and the bounds ["q", "q" + "size" - 1], and store the result in "dst".</description>
+	<operation>dst := q
+IF r.LB &gt; (q + size - 1) OR r.UB &lt; q
+	dst.LB := 1
+	dst.UB := 0
+ELSE
+	dst.LB := MAX(r.LB, q)
+	dst.UB := MIN(r.UB, (q + size - 1))
+FI
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_bnd_copy_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void *"/>
+	<parameter type="const void *" varname="q"/>
+	<parameter type="const void *" varname="r"/>
+	<description>Make a pointer with the value of "q" and bounds set to the bounds of "r" (e.g. copy the bounds of "r" to pointer "q"), and store the result in "dst".</description>
+	<operation>dst := q
+dst.LB := r.LB
+dst.UB := r.UB
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_bnd_init_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void *"/>
+	<parameter type="const void *" varname="q"/>
+	<description>Make a pointer with the value of "q" and open bounds, which allow the pointer to access the entire virtual address space, and store the result in "dst".</description>
+	<operation>dst := q
+dst.LB := 0
+dst.UB := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bnd_store_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="const void **" varname="ptr_addr"/>
+	<parameter type="const void *" varname="ptr_val"/>
+	<description>Stores the bounds of "ptr_val" pointer in memory at address "ptr_addr".</description>
+	<operation>MEM[ptr_addr].LB := ptr_val.LB
+MEM[ptr_addr].UB := ptr_val.UB
+	</operation>
+	<instruction name="BNDSTX" form="mib, bnd" xed="BNDSTX_MEMbnd64_BND"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bnd_chk_ptr_lbounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="const void *" varname="q"/>
+	<description>Checks if "q" is within its lower bound, and throws a #BR if not.</description>
+	<operation>IF q &lt; q.LB
+	#BR
+FI
+	</operation>
+	<instruction name="BNDCL" form="bnd, m64" xed="BNDCL_BND_AGEN"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bnd_chk_ptr_ubounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="const void *" varname="q"/>
+	<description>Checks if "q" is within its upper bound, and throws a #BR if not.</description>
+	<operation>IF q &gt; q.UB
+	#BR
+FI
+	</operation>
+	<instruction name="BNDCU" form="bnd, m64" xed="BNDCU_BND_AGEN"/>
+	<instruction name="BNDCN" form="bnd, m64" xed="BNDCN_BND_AGEN"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bnd_chk_ptr_bounds">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="const void *" varname="q"/>
+	<parameter type="size_t" varname="size" etype="UI64"/>
+	<description>Checks if ["q", "q" + "size" - 1] is within the lower and upper bounds of "q" and throws a #BR if not.</description>
+	<operation>IF (q + size - 1) &lt; q.LB OR (q + size - 1) &gt; q.UB
+	#BR
+FI
+	</operation>
+	<instruction name="BNDCU" form="bnd, m32" xed="BNDCU_BND_AGEN"/>
+	<instruction name="BNDCN" form="bnd, m32" xed="BNDCN_BND_AGEN"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_bnd_get_ptr_lbound">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="const void *"/>
+	<parameter type="const void *" varname="q"/>
+	<description>Return the lower bound of "q".</description>
+	<operation>dst := q.LB
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_bnd_get_ptr_ubound">
+	<CPUID>MPX</CPUID>
+	<category>Miscellaneous</category>
+	<return type="const void *"/>
+	<parameter type="const void *" varname="q"/>
+	<description>Return the upper bound of "q".</description>
+	<operation>dst := q.UB
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bit_scan_forward">
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Set "dst" to the index of the lowest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined.</description>
+	<operation>
+tmp := 0
+IF a == 0
+	// dst is undefined
+ELSE
+	DO WHILE ((tmp &lt; 32) AND a[tmp] == 0)
+		tmp := tmp + 1
+	OD
+FI
+dst := tmp
+	</operation>
+	<instruction name="BSF" form="r32, r32" xed="BSF_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bit_scan_reverse">
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Set "dst" to the index of the highest set bit in 32-bit integer "a". If no bits are set in "a" then "dst" is undefined.</description>
+	<operation>
+tmp := 31
+IF a == 0
+	// dst is undefined
+ELSE
+	DO WHILE ((tmp &gt; 0) AND a[tmp] == 0)
+		tmp := tmp - 1
+	OD
+FI
+dst := tmp
+	</operation>
+	<instruction name="BSR" form="r32, r32" xed="BSR_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_BitScanForward">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned __int32*" varname="index" etype="UI32" memwidth="32"/>
+	<parameter type="unsigned __int32" varname="a" etype="UI32"/>
+	<description>Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1.</description>
+	<operation>
+tmp := 0
+IF a == 0
+	// MEM[index+31:index] is undefined
+	dst := 0
+ELSE
+	DO WHILE ((tmp &lt; 32) AND a[tmp] == 0)
+		tmp := tmp + 1
+	OD
+	MEM[index+31:index] := tmp
+	dst := (tmp == 31) ? 0 : 1
+FI
+	</operation>
+	<instruction name="BSF" form="r32, r32" xed="BSF_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_BitScanReverse">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned __int32*" varname="index" etype="UI32" memwidth="32"/>
+	<parameter type="unsigned __int32" varname="a" etype="UI32"/>
+	<description>Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1.</description>
+	<operation>
+tmp := 31
+IF a == 0
+	// MEM[index+31:index] is undefined
+	dst := 0
+ELSE
+	DO WHILE ((tmp &gt; 0) AND a[tmp] == 0)
+		tmp := tmp - 1
+	OD
+	MEM[index+31:index] := tmp
+	dst := (tmp == 0) ? 0 : 1
+FI
+	</operation>
+	<instruction name="BSR" form="r32, r32" xed="BSR_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_BitScanForward64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned __int32*" varname="index" etype="UI32" memwidth="32"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Set "index" to the index of the lowest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1.</description>
+	<operation>
+tmp := 0
+IF a == 0
+	// MEM[index+31:index] is undefined
+	dst := 0
+ELSE
+	DO WHILE ((tmp &lt; 64) AND a[tmp] == 0)
+		tmp := tmp + 1
+	OD
+	MEM[index+31:index] := tmp
+	dst := (tmp == 63) ? 0 : 1
+FI
+	</operation>
+	<instruction name="BSF" form="r64, r64" xed="BSF_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_BitScanReverse64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned __int32*" varname="index" etype="UI32" memwidth="32"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Set "index" to the index of the highest set bit in 32-bit integer "mask". If no bits are set in "a", then "index" is undefined and "dst" is set to 0, otherwise "dst" is set to 1.</description>
+	<operation>
+tmp := 63
+IF a == 0
+	// MEM[index+31:index] is undefined
+	dst := 0
+ELSE
+	DO WHILE ((tmp &gt; 0) AND a[tmp] == 0)
+		tmp := tmp - 1
+	OD
+	MEM[index+31:index] := tmp
+	dst := (tmp == 0) ? 0 : 1
+FI
+	</operation>
+	<instruction name="BSR" form="r64, r64" xed="BSR_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittest">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int32*" varname="a" etype="UI32" memwidth="32"/>
+	<parameter type="__int32" varname="b" etype="IMM" immwidth="5"/>
+	<description>Return the bit at index "b" of 32-bit integer "a".</description>
+	<operation>
+addr := a + ZeroExtend64(b)
+dst[0] := MEM[addr]
+	</operation>
+	<instruction name="BT" form="m32, r32" xed="BT_MEMv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittestandcomplement">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int32*" varname="a" etype="UI32" memwidth="32"/>
+	<parameter type="__int32" varname="b" etype="IMM" immwidth="5"/>
+	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to its complement.</description>
+	<operation>
+addr := a + ZeroExtend64(b)
+dst[0] := MEM[addr]
+MEM[addr] := ~dst[0]
+	</operation>
+	<instruction name="BTC" form="m32, r32" xed="BTC_MEMv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittestandreset">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int32*" varname="a" etype="UI32" memwidth="32"/>
+	<parameter type="__int32" varname="b" etype="IMM" immwidth="5"/>
+	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to zero.</description>
+	<operation>
+addr := a + ZeroExtend64(b)
+dst[0] := MEM[addr]
+MEM[addr] := 0
+	</operation>
+	<instruction name="BTR" form="m32, r32" xed="BTR_MEMv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittestandset">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int32*" varname="a" etype="UI32" memwidth="32"/>
+	<parameter type="__int32" varname="b" etype="IMM" immwidth="5"/>
+	<description>Return the bit at index "b" of 32-bit integer "a", and set that bit to one.</description>
+	<operation>
+addr := a + ZeroExtend64(b)
+dst[0] := MEM[addr]
+MEM[addr] := 1
+	</operation>
+	<instruction name="BTS" form="m32, r32" xed="BTS_MEMv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittest64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int64*" varname="a" etype="UI64" memwidth="32"/>
+	<parameter type="__int64" varname="b" etype="IMM" immwidth="6"/>
+	<description>Return the bit at index "b" of 64-bit integer "a".</description>
+	<operation>
+addr := a + b
+dst[0] := MEM[addr]
+	</operation>
+	<instruction name="BT" form="r64, r64" xed="BT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittestandcomplement64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int64*" varname="a" etype="UI64" memwidth="32"/>
+	<parameter type="__int64" varname="b" etype="IMM" immwidth="6"/>
+	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to its complement.</description>
+	<operation>
+addr := a + b
+dst[0] := MEM[addr]
+MEM[addr] := ~dst[0]
+	</operation>
+	<instruction name="BTC" form="r64, r64" xed="BTC_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittestandreset64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int64*" varname="a" etype="UI64" memwidth="32"/>
+	<parameter type="__int64" varname="b" etype="IMM" immwidth="6"/>
+	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to zero.</description>
+	<operation>
+addr := a + b
+dst[0] := MEM[addr]
+MEM[addr] := 0
+	</operation>
+	<instruction name="BTR" form="r64, r64" xed="BTR_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bittestandset64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Bit Manipulation</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="__int64*" varname="a" etype="UI64" memwidth="32"/>
+	<parameter type="__int64" varname="b" etype="IMM" immwidth="6"/>
+	<description>Return the bit at index "b" of 64-bit integer "a", and set that bit to one.</description>
+	<operation>
+addr := a + b
+dst[0] := MEM[addr]
+MEM[addr] := 1
+	</operation>
+	<instruction name="BTS" form="r64, r64" xed="BTS_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bswap">
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Reverse the byte order of 32-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values.</description>
+	<operation>
+dst[7:0] := a[31:24]
+dst[15:8] := a[23:16]
+dst[23:16] := a[15:8]
+dst[31:24] := a[7:0]
+	</operation>
+	<instruction name="BSWAP" form="r32" xed="BSWAP_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_bswap64">
+	<type>Integer</type>
+	<category>Bit Manipulation</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Reverse the byte order of 64-bit integer "a", and store the result in "dst". This intrinsic is provided for conversion between little and big endian values.</description>
+	<operation>
+dst[7:0] := a[63:56]
+dst[15:8] := a[55:48]
+dst[23:16] := a[47:40]
+dst[31:24] := a[39:32]
+dst[39:32] := a[31:24]
+dst[47:40] := a[23:16]
+dst[55:48] := a[15:8]
+dst[63:56] := a[7:0]
+	</operation>
+	<instruction name="BSWAP" form="r64" xed="BSWAP_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_castf32_u32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<return type="unsigned __int32" varname="dst" etype="UI32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Cast from type float to type unsigned __int32 without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_castf64_u64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Cast from type double to type unsigned __int64 without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_castu32_f32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="unsigned __int32" varname="a" etype="UI32"/>
+	<description>Cast from type unsigned __int32 to type float without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_castu64_f64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<category>Cast</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Cast from type unsigned __int64 to type double without conversion.
+	This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_lrotl">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned long" varname="dst" etype="UI32"/>
+	<parameter type="unsigned long" varname="a" etype="UI32"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of unsigned long integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>// size := 32 or 64
+dst := a
+count := shift AND (size - 1)
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[size - 1]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROL" form="r64, imm8" xed="ROL_GPRv_IMMb"/>
+	<instruction name="ROL" form="r32, imm8" xed="ROL_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_lrotr">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned long" varname="dst" etype="UI32"/>
+	<parameter type="unsigned long" varname="a" etype="UI32"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="8"/>
+	<description>Shift the bits of unsigned long integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>// size := 32 or 64
+dst := a
+count := shift AND (size - 1)
+DO WHILE (count &gt; 0)
+	tmp[size - 1] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp[size - 1]
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROR" form="r64, imm8" xed="ROR_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_allow_cpu_features">
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned __int64" varname="a" etype="IMM" immwidth="8"/>
+	<description>Treat the processor-specific feature(s) specified in "a" as available. Multiple features may be OR'd together. See the valid feature flags below:</description>
+	<operation>
+_FEATURE_GENERIC_IA32
+_FEATURE_FPU
+_FEATURE_CMOV
+_FEATURE_MMX
+_FEATURE_FXSAVE
+_FEATURE_SSE
+_FEATURE_SSE2
+_FEATURE_SSE3
+_FEATURE_SSSE3
+_FEATURE_SSE4_1
+_FEATURE_SSE4_2
+_FEATURE_MOVBE
+_FEATURE_POPCNT
+_FEATURE_PCLMULQDQ
+_FEATURE_AES
+_FEATURE_F16C
+_FEATURE_AVX
+_FEATURE_RDRND
+_FEATURE_FMA
+_FEATURE_BMI
+_FEATURE_LZCNT
+_FEATURE_HLE
+_FEATURE_RTM
+_FEATURE_AVX2
+_FEATURE_KNCNI
+_FEATURE_AVX512F
+_FEATURE_ADX
+_FEATURE_RDSEED
+_FEATURE_AVX512ER
+_FEATURE_AVX512PF
+_FEATURE_AVX512CD
+_FEATURE_SHA
+_FEATURE_MPX
+_FEATURE_AVX512BW
+_FEATURE_AVX512VL
+_FEATURE_AVX512VBMI
+_FEATURE_AVX512_4FMAPS
+_FEATURE_AVX512_4VNNIW
+_FEATURE_AVX512_VPOPCNTDQ
+_FEATURE_AVX512_BITALG
+_FEATURE_AVX512_VBMI2
+_FEATURE_GFNI
+_FEATURE_VAES
+_FEATURE_VPCLMULQDQ
+_FEATURE_AVX512_VNNI
+_FEATURE_CLWB
+_FEATURE_RDPID
+_FEATURE_IBT
+_FEATURE_SHSTK
+_FEATURE_SGX
+_FEATURE_WBNOINVD
+_FEATURE_PCONFIG
+_FEATURE_AXV512_4VNNIB
+_FEATURE_AXV512_4FMAPH
+_FEATURE_AXV512_BITALG2
+_FEATURE_AXV512_VP2INTERSECT
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_may_i_use_cpu_feature">
+	<category>General Support</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="unsigned __int64" varname="a" etype="IMM" immwidth="8"/>
+	<description>Dynamically query the processor to determine if the processor-specific feature(s) specified in "a" are available, and return true or false (1 or 0) if the set of features is available. Multiple features may be OR'd together. This intrinsic does not check the processor vendor. See the valid feature flags below:</description>
+	<operation>
+_FEATURE_GENERIC_IA32
+_FEATURE_FPU
+_FEATURE_CMOV
+_FEATURE_MMX
+_FEATURE_FXSAVE
+_FEATURE_SSE
+_FEATURE_SSE2
+_FEATURE_SSE3
+_FEATURE_SSSE3
+_FEATURE_SSE4_1
+_FEATURE_SSE4_2
+_FEATURE_MOVBE
+_FEATURE_POPCNT
+_FEATURE_PCLMULQDQ
+_FEATURE_AES
+_FEATURE_F16C
+_FEATURE_AVX
+_FEATURE_RDRND
+_FEATURE_FMA
+_FEATURE_BMI
+_FEATURE_LZCNT
+_FEATURE_HLE
+_FEATURE_RTM
+_FEATURE_AVX2
+_FEATURE_KNCNI
+_FEATURE_AVX512F
+_FEATURE_ADX
+_FEATURE_RDSEED
+_FEATURE_AVX512ER
+_FEATURE_AVX512PF
+_FEATURE_AVX512CD
+_FEATURE_SHA
+_FEATURE_MPX
+_FEATURE_AVX512BW
+_FEATURE_AVX512VL
+_FEATURE_AVX512VBMI
+_FEATURE_AVX512_4FMAPS
+_FEATURE_AVX512_4VNNIW
+_FEATURE_AVX512_VPOPCNTDQ
+_FEATURE_AVX512_BITALG
+_FEATURE_AVX512_VBMI2
+_FEATURE_GFNI
+_FEATURE_VAES
+_FEATURE_VPCLMULQDQ
+_FEATURE_AVX512_VNNI
+_FEATURE_CLWB
+_FEATURE_RDPID
+_FEATURE_IBT
+_FEATURE_SHSTK
+_FEATURE_SGX
+_FEATURE_WBNOINVD
+_FEATURE_PCONFIG
+_FEATURE_AXV512_4VNNIB
+_FEATURE_AXV512_4FMAPH
+_FEATURE_AXV512_BITALG2
+_FEATURE_AXV512_VP2INTERSECT
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdpmc">
+	<category>General Support</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Read the Performance Monitor Counter (PMC) specified by "a", and store up to 64-bits in "dst". The width of performance counters is implementation specific.</description>
+	<operation>dst[63:0] := ReadPMC(a)
+	</operation>
+	<instruction name="RDPMC" xed="RDPMC"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rotl">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="5"/>
+	<description>Shift the bits of unsigned 32-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift AND 31
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[31]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROL" form="r32, imm8" xed="ROL_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rotr">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="5"/>
+	<description>Shift the bits of unsigned 32-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift AND 31
+DO WHILE (count &gt; 0)
+	tmp[31] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROR" form="r32, imm8" xed="ROR_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rotwl">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned short" varname="dst" etype="UI16"/>
+	<parameter type="unsigned short" varname="a" etype="UI16"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="4"/>
+	<description>Shift the bits of unsigned 16-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift AND 15
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[15]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROL" form="r16, imm8" xed="ROL_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rotwr">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned short" varname="dst" etype="UI16"/>
+	<parameter type="unsigned short" varname="a" etype="UI16"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="4"/>
+	<description>Shift the bits of unsigned 16-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift AND 15
+DO WHILE (count &gt; 0)
+	tmp[15] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROR" form="r16, imm8" xed="ROR_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rotl64">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="6"/>
+	<description>Shift the bits of unsigned 64-bit integer "a" left by the number of bits specified in "shift", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift AND 63
+DO WHILE (count &gt; 0)
+	tmp[0] := dst[63]
+	dst := (dst &lt;&lt; 1) OR tmp[0]
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROL" form="r64, imm8" xed="ROL_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rotr64">
+	<type>Integer</type>
+	<category>Shift</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="int" varname="shift" etype="IMM" immwidth="6"/>
+	<description>Shift the bits of unsigned 64-bit integer "a" right by the number of bits specified in "shift", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in "dst".</description>
+	<operation>
+dst := a
+count := shift AND 63
+DO WHILE (count &gt; 0)
+	tmp[63] := dst[0]
+	dst := (dst &gt;&gt; 1) OR tmp[63]
+	count := count - 1
+OD
+	</operation>
+	<instruction name="ROR" form="r64, imm8" xed="ROR_GPRv_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_addcarry_u32">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Arithmetic</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned char" varname="c_in" etype="UI8"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<parameter type="unsigned int *" varname="out" etype="UI32" memwidth="32"/>
+	<description>Add unsigned 32-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+tmp[32:0] := a[31:0] + b[31:0] + (c_in &gt; 0 ? 1 : 0)
+MEM[out+31:out] := tmp[31:0]
+dst[0] := tmp[32]
+dst[7:1] := 0
+	</operation>
+	<instruction name="ADC" form="r32, r32" xed="ADC_GPRv_GPRv_11"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_addcarry_u64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Arithmetic</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned char" varname="c_in" etype="UI8"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<parameter type="unsigned __int64 *" varname="out" etype="UI64" memwidth="64"/>
+	<description>Add unsigned 64-bit integers "a" and "b" with unsigned 8-bit carry-in "c_in" (carry flag), and store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+tmp[64:0] := a[63:0] + b[63:0] + (c_in &gt; 0 ? 1 : 0)
+MEM[out+63:out] := tmp[63:0]
+dst[0] := tmp[64]
+dst[7:1] := 0
+	</operation>
+	<instruction name="ADC" form="r64, r64" xed="ADC_GPRv_GPRv_11"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_subborrow_u32">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Arithmetic</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned char" varname="c_in" etype="UI8"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned int" varname="b" etype="UI32"/>
+	<parameter type="unsigned int *" varname="out" etype="UI32" memwidth="32"/>
+	<description>Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 32-bit integer "b", and subtract the result from unsigned 32-bit integer "a". Store the unsigned 32-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+tmp[32:0] := a[31:0] - (b[31:0] + (c_in &gt; 0 ? 1 : 0))
+MEM[out+31:out] := tmp[31:0]
+dst[0] := tmp[32]
+dst[7:1] := 0
+	</operation>
+	<instruction name="SBB" form="r32, r32" xed="SBB_GPRv_GPRv_19"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_subborrow_u64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<category>Arithmetic</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned char" varname="c_in" etype="UI8"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="b" etype="UI64"/>
+	<parameter type="unsigned __int64 *" varname="out" etype="UI64" memwidth="64"/>
+	<description>Add unsigned 8-bit borrow "c_in" (carry flag) to unsigned 64-bit integer "b", and subtract the result from unsigned 64-bit integer "a". Store the unsigned 64-bit result in "out", and the carry-out in "dst" (carry or overflow flag).</description>
+	<operation>
+tmp[64:0] := a[63:0] - (b[63:0] + (c_in &gt; 0 ? 1 : 0))
+MEM[out+63:out] := tmp[63:0]
+dst[0] := tmp[64]
+dst[7:1] := 0
+	</operation>
+	<instruction name="SBB" form="r64, r64" xed="SBB_GPRv_GPRv_19"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_ptwrite32">
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Insert the 32-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled.</description>
+	<instruction name="PTWRITE" form="r32" xed="PTWRITE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_ptwrite64">
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Insert the 64-bit data from "a" into a Processor Trace stream via a PTW packet. The PTW packet will be inserted if tracing is currently enabled and ptwrite is currently enabled. The current IP will also be inserted via a FUP packet if FUPonPTW is enabled.</description>
+	<instruction name="PTWRITE" form="r64" xed="PTWRITE_GPRy"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_enclu_u32">
+	<category>Miscellaneous</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="const int" varname="a" etype="UI32"/>
+	<parameter type="size_t*" varname="__data" etype="UI64"/>
+	<description>Invoke the Intel SGX enclave user (non-privilege) leaf function specified by "a", and return the error code. The "__data" array contains 3 32-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx.</description>
+	<instruction name="ENCLU" xed="ENCLU"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_encls_u32">
+	<category>Miscellaneous</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="const int" varname="a" etype="UI32"/>
+	<parameter type="size_t*" varname="__data" etype="UI64"/>
+	<description>Invoke the Intel SGX enclave system (privileged) leaf function specified by "a", and return the error code. The "__data" array contains 3 32-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx.</description>
+	<instruction name="ENCLS" xed="ENCLS"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_enclv_u32">
+	<category>Miscellaneous</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="const int" varname="a" etype="UI32"/>
+	<parameter type="size_t*" varname="__data" etype="UI64"/>
+	<description>Invoke the Intel SGX enclave virtualized (VMM) leaf function specified by "a", and return the error code. The "__data" array contains 3 32-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to ebx, ecx, and edx.</description>
+	<instruction name="ENCLV" xed="ENCLV"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_wbinvd">
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Write back and flush internal caches.
+		Initiate writing-back and flushing of external
+		caches.</description>
+	<instruction name="WBINVD" xed="WBINVD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_cvtsh_ss">
+	<type>Floating Point</type>
+	<category>Convert</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="unsigned short" varname="a" etype="UI16"/>
+	<description>Convert the half-precision (16-bit) floating-point value "a" to a single-precision (32-bit) floating-point value, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP16_To_FP32(a[15:0])
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" sequence="TRUE" name="_cvtss_sh">
+	<type>Floating Point</type>
+	<category>Convert</category>
+	<return type="unsigned short" varname="dst" etype="UI16"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" hint="TRUE" immtype="_MM_FROUND"/>
+	<description>Convert the single-precision (32-bit) floating-point value "a" to a half-precision (16-bit) floating-point value, and store the result in "dst".
+	[round_note]</description>
+	<operation>
+dst[15:0] := Convert_FP32_To_FP16(a[31:0])
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" vexEq="TRUE" name="_mm_clmulepi64_si128">
+	<type>Integer</type>
+	<CPUID>PCLMULQDQ</CPUID>
+	<category>Application-Targeted</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst".</description>
+	<operation>
+IF (imm8[0] == 0)
+	TEMP1 := a[63:0]
+ELSE
+	TEMP1 := a[127:64]
+FI 
+IF (imm8[4] == 0)
+	TEMP2 := b[63:0]
+ELSE 
+	TEMP2 := b[127:64]
+FI
+FOR i := 0 to 63
+	TEMP[i] := (TEMP1[0] and TEMP2[i])
+	FOR j := 1 to i
+		TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j])
+	ENDFOR 
+	dst[i] := TEMP[i]
+ENDFOR
+FOR i := 64 to 127
+	TEMP[i] := 0
+	FOR j := (i - 63) to 63
+		TEMP[i] := TEMP[i] XOR (TEMP1[j] AND TEMP2[i-j])
+	ENDFOR
+	dst[i] := TEMP[i]
+ENDFOR
+dst[127] := 0
+	</operation>
+	<instruction name="PCLMULQDQ" form="xmm, xmm, imm8" xed="PCLMULQDQ_XMMdq_XMMdq_IMMb"/>
+	<header>wmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_pconfig_u32">
+	<CPUID>PCONFIG</CPUID>
+	<category>Miscellaneous</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="const int" varname="a" etype="UI32"/>
+	<parameter type="size_t*" varname="__data" etype="UI64"/>
+	<description>Invoke the PCONFIG leaf function specified by "a". The "__data" array contains 3 32-bit elements that may act as input, output, or be unused, depending on the semantics of the specified leaf function; these correspond to rbx, rcx, and rdx. May return the value in eax, depending on the semantics of the specified leaf function.</description>
+	<instruction name="PCONFIG" xed="PCONFIG"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_popcnt_u32">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+dst := 0
+FOR i := 0 to 31
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="POPCNT" form="r32, r32" xed="POPCNT_GPRv_GPRv"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_popcnt_u64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="a" etype="UI64"/>
+	<description>Count the number of bits set to 1 in unsigned 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+dst := 0
+FOR i := 0 to 63
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="POPCNT" form="r64, r64" xed="POPCNT_GPRv_GPRv"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_popcnt32">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Count the number of bits set to 1 in 32-bit integer "a", and return that count in "dst".</description>
+	<operation>
+dst := 0
+FOR i := 0 to 31
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="POPCNT" form="r32, r32" xed="POPCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_popcnt64">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>POPCNT</CPUID>
+	<category>Bit Manipulation</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Count the number of bits set to 1 in 64-bit integer "a", and return that count in "dst".</description>
+	<operation>
+dst := 0
+FOR i := 0 to 63
+	IF a[i]
+		dst := dst + 1
+	FI
+ENDFOR
+	</operation>
+	<instruction name="POPCNT" form="r64, r64" xed="POPCNT_GPRv_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_prefetch">
+	<CPUID>PREFETCHWT1</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="char const*" varname="p" etype="UI8"/>
+	<parameter type="int" varname="i" etype="IMM" immwidth="2"/>
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<instruction name="PREFETCHWT1" form="m8" xed="PREFETCHWT1_MEMu8"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdpid_u32">
+	<CPUID>RDPID</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="void"/>
+	<description>Copy the IA32_TSC_AUX MSR (signature value) into "dst".</description>
+	<operation>dst[31:0] := IA32_TSC_AUX[31:0]
+	</operation>
+	<instruction name="RDPID" form="r32" xed="RDPID_GPR32u32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdrand16_step">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>RDRAND</CPUID>
+	<category>Random</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned short*" varname="val" etype="UI16" memwidth="16"/>
+	<description>Read a hardware generated 16-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>IF HW_RND_GEN.ready == 1
+	val[15:0] := HW_RND_GEN.data
+	dst := 1
+ELSE
+	val[15:0] := 0
+	dst := 0
+FI
+	</operation>
+	<instruction name="RDRAND" form="r16" xed="RDRAND_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdrand32_step">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>RDRAND</CPUID>
+	<category>Random</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int*" varname="val" etype="UI32" memwidth="32"/>
+	<description>Read a hardware generated 32-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>IF HW_RND_GEN.ready == 1
+	val[31:0] := HW_RND_GEN.data
+	dst := 1
+ELSE
+	val[31:0] := 0
+	dst := 0
+FI
+	</operation>
+	<instruction name="RDRAND" form="r32" xed="RDRAND_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdrand64_step">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>RDRAND</CPUID>
+	<category>Random</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned __int64*" varname="val" etype="UI64" memwidth="64"/>
+	<description>Read a hardware generated 64-bit random value and store the result in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>IF HW_RND_GEN.ready == 1
+	val[63:0] := HW_RND_GEN.data
+	dst := 1
+ELSE
+	val[63:0] := 0
+	dst := 0
+FI
+	</operation>
+	<instruction name="RDRAND" form="r64" xed="RDRAND_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdseed16_step">
+	<type>Flag</type>
+	<CPUID>RDSEED</CPUID>
+	<category>Random</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned short *" varname="val" etype="UI16"/>
+	<description>Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>IF HW_NRND_GEN.ready == 1
+	val[15:0] := HW_NRND_GEN.data
+	dst := 1
+ELSE
+	val[15:0] := 0
+	dst := 0
+FI
+	</operation>
+	<instruction name="RDSEED" form="r16" xed="RDSEED_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdseed32_step">
+	<type>Flag</type>
+	<CPUID>RDSEED</CPUID>
+	<category>Random</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int *" varname="val" etype="UI32"/>
+	<description>Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>IF HW_NRND_GEN.ready == 1
+	val[31:0] := HW_NRND_GEN.data
+	dst := 1
+ELSE
+	val[31:0] := 0
+	dst := 0
+FI
+	</operation>
+	<instruction name="RDSEED" form="r32" xed="RDSEED_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdseed64_step">
+	<type>Flag</type>
+	<CPUID>RDSEED</CPUID>
+	<category>Random</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned __int64 *" varname="val" etype="UI64"/>
+	<description>Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store in "val". Return 1 if a random value was generated, and 0 otherwise.</description>
+	<operation>IF HW_NRND_GEN.ready == 1
+	val[63:0] := HW_NRND_GEN.data
+	dst := 1
+ELSE
+	val[63:0] := 0
+	dst := 0
+FI
+	</operation>
+	<instruction name="RDSEED" form="r64" xed="RDSEED_GPRv"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="__rdtscp">
+	<CPUID>RDTSCP</CPUID>
+	<category>General Support</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned int *" varname="mem_addr" etype="UI32" memwidth="32"/>
+	<description>Copy the current 64-bit value of the processor's time-stamp counter into "dst", and store the IA32_TSC_AUX MSR (signature value) into memory at "mem_addr".</description>
+	<operation>dst[63:0] := TimeStampCounter
+MEM[mem_addr+31:mem_addr] := IA32_TSC_AUX[31:0]
+	</operation>
+	<instruction name="RDTSCP" xed="RDTSCP"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xabort">
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="const unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Force an RTM abort. The EAX register is updated to reflect an XABORT instruction caused the abort, and the "imm8" parameter will be provided in bits [31:24] of EAX.
+	Following an RTM abort, the logical processor resumes execution at the fallback address computed through the outermost XBEGIN instruction.</description>
+	<operation>IF RTM_ACTIVE == 0
+	// nop
+ELSE
+	// restore architectural register state
+	// discard memory updates performed in transaction
+	// update EAX with status and imm8 value
+	eax[31:24] := imm8[7:0]
+	RTM_NEST_COUNT := 0
+	RTM_ACTIVE := 0
+	IF _64_BIT_MODE
+		RIP := fallbackRIP
+	ELSE
+		EIP := fallbackEIP
+	FI
+FI
+	</operation>
+	<instruction name="XABORT" form="imm8" xed="XABORT_IMMb"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xbegin">
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="k" etype="UI32"/>
+	<parameter type="void"/>
+	<description>Specify the start of an RTM code region. 
+	If the logical processor was not already in transactional execution, then this call causes the logical processor to transition into transactional execution. 
+	On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution, restores architectural state, and starts execution beginning at the fallback address computed from the outermost XBEGIN instruction. Return status of ~0 (0xFFFF) if continuing inside transaction; all other codes are aborts.</description>
+	<operation>IF RTM_NEST_COUNT &lt; MAX_RTM_NEST_COUNT
+	RTM_NEST_COUNT := RTM_NEST_COUNT + 1
+	IF RTM_NEST_COUNT == 1
+		IF _64_BIT_MODE
+			fallbackRIP := RIP
+		ELSE IF _32_BIT_MODE
+			fallbackEIP := EIP
+		FI
+		
+		RTM_ACTIVE := 1
+		// enter RTM execution, record register state, start tracking memory state
+	FI
+ELSE
+	// RTM abort (see _xabort)
+FI
+	</operation>
+	<instruction name="XBEGIN" form="r32" xed="XBEGIN_RELBRz"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xend">
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Specify the end of an RTM code region.
+	If this corresponds to the outermost scope, the logical processor will attempt to commit the logical processor state atomically. 
+	If the commit fails, the logical processor will perform an RTM abort.</description>
+	<operation>IF RTM_ACTIVE == 1
+	RTM_NEST_COUNT := RTM_NEST_COUNT - 1
+	IF RTM_NEST_COUNT == 0
+		// try to commit transaction
+		IF FAIL_TO_COMMIT_TRANSACTION
+			// RTM abort (see _xabort)
+		ELSE
+			RTM_ACTIVE := 0
+		FI
+	FI
+FI
+	</operation>
+	<instruction name="XEND" xed="XEND"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xtest">
+	<CPUID>RTM</CPUID>
+	<category>General Support</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="void"/>
+	<description>Query the transactional execution status, return 1 if inside a transactionally executing RTM or HLE region, and return 0 otherwise.</description>
+	<operation>IF (RTM_ACTIVE == 1 OR HLE_ACTIVE == 1)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="XTEST" xed="XTEST"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_serialize">
+	<CPUID>SERIALIZE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<description>Serialize instruction execution, ensuring all modifications to flags, registers, and memory by previous instructions are completed before the next instruction is fetched.</description>
+	<instruction name="SERIALIZE" xed="SERIALIZE"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_sha1msg1_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst".</description>
+	<operation>
+W0 := a[127:96]
+W1 := a[95:64]
+W2 := a[63:32]
+W3 := a[31:0]
+W4 := b[127:96]
+W5 := b[95:64]
+dst[127:96] := W2 XOR W0
+dst[95:64] := W3 XOR W1
+dst[63:32] := W4 XOR W2
+dst[31:0] := W5 XOR W3
+	</operation>
+	<instruction name="SHA1MSG1" form="xmm, xmm" xed="SHA1MSG1_XMMi32_XMMi32_SHA"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_sha1msg2_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in "a" and the previous message values in "b", and store the result in "dst".</description>
+	<operation>
+W13 := b[95:64]
+W14 := b[63:32]
+W15 := b[31:0]
+W16 := (a[127:96] XOR W13) &lt;&lt;&lt; 1
+W17 := (a[95:64] XOR W14) &lt;&lt;&lt; 1
+W18 := (a[63:32] XOR W15) &lt;&lt;&lt; 1
+W19 := (a[31:0] XOR W16) &lt;&lt;&lt; 1
+dst[127:96] := W16
+dst[95:64] := W17
+dst[63:32] := W18
+dst[31:0] := W19
+	</operation>
+	<instruction name="SHA1MSG2" form="xmm, xmm" xed="SHA1MSG2_XMMi32_XMMi32_SHA"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_sha1nexte_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable "a", add that value to the scheduled values (unsigned 32-bit integers) in "b", and store the result in "dst".</description>
+	<operation>
+tmp := (a[127:96] &lt;&lt;&lt; 30)
+dst[127:96] := b[127:96] + tmp
+dst[95:64] := b[95:64]
+dst[63:32] := b[63:32]
+dst[31:0] := b[31:0]
+	</operation>
+	<instruction name="SHA1NEXTE" form="xmm, xmm" xed="SHA1NEXTE_XMMi32_XMMi32_SHA"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_sha1rnds4_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="const int" varname="func" etype="IMM" immwidth="2"/>
+	<description>Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) from "a" and some pre-computed sum of the next 4 round message values (unsigned 32-bit integers), and state variable E from "b", and store the updated SHA1 state (A,B,C,D) in "dst". "func" contains the logic functions and round constants.</description>
+	<operation>IF (func[1:0] == 0)
+	f := f0()
+	K := K0
+ELSE IF (func[1:0] == 1)
+	f := f1()
+	K := K1
+ELSE IF (func[1:0] == 2)
+	f := f2()
+	K := K2
+ELSE IF (func[1:0] == 3)
+	f := f3()
+	K := K3
+FI
+A := a[127:96]
+B := a[95:64]
+C := a[63:32]
+D := a[31:0]
+W[0] := b[127:96]
+W[1] := b[95:64]
+W[2] := b[63:32]
+W[3] := b[31:0]
+A[1] := f(B, C, D) + (A &lt;&lt;&lt; 5) + W[0] + K
+B[1] := A
+C[1] := B &lt;&lt;&lt; 30
+D[1] := C
+E[1] := D
+FOR i := 1 to 3
+	A[i+1] := f(B[i], C[i], D[i]) + (A[i] &lt;&lt;&lt; 5) + W[i] + E[i] + K
+	B[i+1] := A[i]
+	C[i+1] := B[i] &lt;&lt;&lt; 30
+	D[i+1] := C[i]
+	E[i+1] := D[i]
+ENDFOR
+dst[127:96] := A[4]
+dst[95:64] := B[4]
+dst[63:32] := C[4]
+dst[31:0] := D[4]
+	</operation>
+	<instruction name="SHA1RNDS4" form="xmm, xmm, imm8" xed="SHA1RNDS4_XMMi32_XMMi32_IMM8_SHA"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_sha256msg1_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst".</description>
+	<operation>W4 := b[31:0]
+W3 := a[127:96]
+W2 := a[95:64]
+W1 := a[63:32]
+W0 := a[31:0]
+dst[127:96] := W3 + sigma0(W4)
+dst[95:64] := W2 + sigma0(W3)
+dst[63:32] := W1 + sigma0(W2)
+dst[31:0] := W0 + sigma0(W1)
+	</operation>
+	<instruction name="SHA256MSG1" form="xmm, xmm" xed="SHA256MSG1_XMMi32_XMMi32_SHA"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_sha256msg2_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from "a" and "b", and store the result in "dst"."</description>
+	<operation>W14 := b[95:64]
+W15 := b[127:96]
+W16 := a[31:0] + sigma1(W14)
+W17 := a[63:32] + sigma1(W15)
+W18 := a[95:64] + sigma1(W16)
+W19 := a[127:96] + sigma1(W17)
+dst[127:96] := W19
+dst[95:64] := W18
+dst[63:32] := W17
+dst[31:0] := W16
+	</operation>
+	<instruction name="SHA256MSG2" form="xmm, xmm" xed="SHA256MSG2_XMMi32_XMMi32_SHA"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm_sha256rnds2_epu32">
+	<type>Integer</type>
+	<CPUID>SHA</CPUID>
+	<category>Cryptography</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<parameter type="__m128i" varname="k" etype="UI32"/>
+	<description>Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from "a", an initial SHA256 state (A,B,E,F) from "b", and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from "k", and store the updated SHA256 state (A,B,E,F) in "dst".</description>
+	<operation>A[0] := b[127:96]
+B[0] := b[95:64]
+C[0] := a[127:96]
+D[0] := a[95:64]
+E[0] := b[63:32]
+F[0] := b[31:0]
+G[0] := a[63:32]
+H[0] := a[31:0]
+W_K[0] := k[31:0]
+W_K[1] := k[63:32]
+FOR i := 0 to 1
+	A[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + Maj(A[i], B[i], C[i]) + sum0(A[i])
+	B[i+1] := A[i]
+	C[i+1] := B[i]
+	D[i+1] := C[i]
+	E[i+1] := Ch(E[i], F[i], G[i]) + sum1(E[i]) + W_K[i] + H[i] + D[i]
+	F[i+1] := E[i]
+	G[i+1] := F[i]
+	H[i+1] := G[i]
+ENDFOR
+dst[127:96] := A[2]
+dst[95:64] := B[2]
+dst[63:32] := E[2]
+dst[31:0] := F[2]
+	</operation>
+	<instruction name="SHA256RNDS2" form="xmm, xmm" xed="SHA256RNDS2_XMMi32_XMMi32_SHA"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_MM_TRANSPOSE4_PS">
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="void"/>
+	<parameter type="__m128" varname="row0" etype="FP32"/>
+	<parameter type="__m128" varname="row1" etype="FP32"/>
+	<parameter type="__m128" varname="row2" etype="FP32"/>
+	<parameter type="__m128" varname="row3" etype="FP32"/>
+	<description>Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision (32-bit) floating-point elements in "row0", "row1", "row2", and "row3", and store the transposed matrix in these vectors ("row0" now contains column 0, etc.).</description>
+	<operation>
+__m128 tmp3, tmp2, tmp1, tmp0;
+tmp0 := _mm_unpacklo_ps(row0, row1);
+tmp2 := _mm_unpacklo_ps(row2, row3);
+tmp1 := _mm_unpackhi_ps(row0, row1);
+tmp3 := _mm_unpackhi_ps(row2, row3);
+row0 := _mm_movelh_ps(tmp0, tmp2);
+row1 := _mm_movehl_ps(tmp2, tmp0);
+row2 := _mm_movelh_ps(tmp1, tmp3);
+row3 := _mm_movehl_ps(tmp3, tmp1);
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_getcsr">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="void"/>
+	<description>Get the unsigned 32-bit value of the MXCSR control and status register.</description>
+	<operation>dst[31:0] := MXCSR
+	</operation>
+	<instruction name="STMXCSR" form="m32" xed="STMXCSR_MEMd"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_setcsr">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Set the MXCSR control and status register with the value in unsigned 32-bit integer "a".</description>
+	<operation>
+MXCSR := a[31:0]
+	</operation>
+	<instruction name="LDMXCSR" form="m32" xed="LDMXCSR_MEMd"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_GET_EXCEPTION_STATE">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<description>Macro: Get the exception state bits from the MXCSR control and status register. The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT</description>
+	<operation>dst[31:0] := MXCSR &amp; _MM_EXCEPT_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_SET_EXCEPTION_STATE">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Macro: Set the exception state bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT</description>
+	<operation>MXCSR := a[31:0] AND ~_MM_EXCEPT_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_GET_EXCEPTION_MASK">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<description>Macro: Get the exception mask bits from the MXCSR control and status register. The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT</description>
+	<operation>dst[31:0] := MXCSR &amp; _MM_MASK_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_SET_EXCEPTION_MASK">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Macro: Set the exception mask bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT</description>
+	<operation>MXCSR := a[31:0] AND ~_MM_MASK_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_GET_ROUNDING_MODE">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<description>Macro: Get the rounding mode bits from the MXCSR control and status register. The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO</description>
+	<operation>dst[31:0] := MXCSR &amp; _MM_ROUND_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_SET_ROUNDING_MODE">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Macro: Set the rounding mode bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO</description>
+	<operation>MXCSR := a[31:0] AND ~_MM_ROUND_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_GET_FLUSH_ZERO_MODE">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<description>Macro: Get the flush zero bits from the MXCSR control and status register. The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF</description>
+	<operation>dst[31:0] := MXCSR &amp; _MM_FLUSH_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_MM_SET_FLUSH_ZERO_MODE">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Macro: Set the flush zero bits of the MXCSR control and status register to the value in unsigned 32-bit integer "a". The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF</description>
+	<operation>MXCSR := a[31:0] AND ~_MM_FLUSH_MASK
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_prefetch">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="char const*" varname="p" etype="UI8"/>
+	<parameter type="int" varname="i" etype="IMM" immwidth="2"/>
+	<description>Fetch the line of data from memory that contains address "p" to a location in the cache heirarchy specified by the locality hint "i".</description>
+	<instruction name="PREFETCHNTA" form="m8" xed="PREFETCHNTA_MEMmprefetch"/>
+	<instruction name="PREFETCHT0" form="m8" xed="PREFETCHT0_MEMmprefetch"/>
+	<instruction name="PREFETCHT1" form="m8" xed="PREFETCHT1_MEMmprefetch"/>
+	<instruction name="PREFETCHT2" form="m8" xed="PREFETCHT2_MEMmprefetch"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_sfence">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Perform a serializing operation on all store-to-memory instructions that were issued prior to this instruction. Guarantees that every store instruction that precedes, in program order, is globally visible before any store instruction which follows the fence in program order.</description>
+	<instruction name="SFENCE" xed="SFENCE"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_max_pi16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXSW" form="mm, mm" xed="PMAXSW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pmaxsw">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXSW" form="mm, mm" xed="PMAXSW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_max_pu8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXUB" form="mm, mm" xed="PMAXUB_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pmaxub">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXUB" form="mm, mm" xed="PMAXUB_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_min_pi16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINSW" form="mm, mm" xed="PMINSW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pminsw">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINSW" form="mm, mm" xed="PMINSW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_min_pu8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINUB" form="mm, mm" xed="PMINUB_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pminub">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINUB" form="mm, mm" xed="PMINUB_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_mulhi_pu16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name="PMULHUW" form="mm, mm" xed="PMULHUW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pmulhuw">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name="PMULHUW" form="mm, mm" xed="PMULHUW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_avg_pu8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name="PAVGB" form="mm, mm" xed="PAVGB_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pavgb">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name="PAVGB" form="mm, mm" xed="PAVGB_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_avg_pu16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name="PAVGW" form="mm, mm" xed="PAVGW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pavgw">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="__m64" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name="PAVGW" form="mm, mm" xed="PAVGW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_sad_pu8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56]
+dst[63:16] := 0
+	</operation>
+	<instruction name="PSADBW" form="mm, mm" xed="PSADBW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_psadbw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56]
+dst[63:16] := 0
+	</operation>
+	<instruction name="PSADBW" form="mm, mm" xed="PSADBW_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cvtsi32_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="b" etype="SI32"/>
+	<description>Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CVTSI2SS" form="xmm, r32" xed="CVTSI2SS_XMMss_GPR32d"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cvt_si2ss">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="b" etype="SI32"/>
+	<description>Convert the signed 32-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CVTSI2SS" form="xmm, r32" xed="CVTSI2SS_XMMss_GPR32d"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvtsi64_ss">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<description>Convert the signed 64-bit integer "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="CVTSI2SS" form="xmm, r64" xed="CVTSI2SS_XMMss_GPR64q"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvtpi32_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Convert packed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[63:32] := Convert_Int32_To_FP32(b[63:32])
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name="CVTPI2PS" form="xmm, mm" xed="CVTPI2PS_XMMq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvt_pi2ps">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "b" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", and copy the upper 2 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(b[31:0])
+dst[63:32] := Convert_Int32_To_FP32(b[63:32])
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name="CVTPI2PS" form="xmm, mm" xed="CVTPI2PS_XMMq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_cvtpi16_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<description>Convert packed 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	m := j*32
+	dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_cvtpu16_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<description>Convert packed unsigned 16-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	m := j*32
+	dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_cvtpi8_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI8"/>
+	<description>Convert the lower packed 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*8
+	m := j*32
+	dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_cvtpu8_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<description>Convert the lower packed unsigned 8-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*8
+	m := j*32
+	dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i])
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_cvtpi32x2_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of "dst", then covert the packed signed 32-bit integers in "b" to single-precision (32-bit) floating-point element, and store the results in the upper 2 elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_Int32_To_FP32(a[31:0])
+dst[63:32] := Convert_Int32_To_FP32(a[63:32])
+dst[95:64] := Convert_Int32_To_FP32(b[31:0])
+dst[127:96] := Convert_Int32_To_FP32(b[63:32])
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_stream_pi">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m64*" varname="mem_addr" etype="FP32" memwidth="64"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<description>Store 64-bits of integer data from "a" into memory using a non-temporal memory hint.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name="MOVNTQ" form="m64, mm" xed="MOVNTQ_MEMq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_maskmove_si64">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="mask" etype="UI8"/>
+	<parameter type="char*" varname="mem_addr" etype="UI8" memwidth="64"/>
+	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF mask[i+7]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="MASKMOVQ" form="mm, mm" xed="MASKMOVQ_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_maskmovq">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="mask" etype="UI8"/>
+	<parameter type="char*" varname="mem_addr" etype="UI8" memwidth="64"/>
+	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element).</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF mask[i+7]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="MASKMOVQ" form="mm, mm" xed="MASKMOVQ_MMXq_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_extract_pi16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[15:0] := (a[63:0] &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := 0
+	</operation>
+	<instruction name="PEXTRW" form="r32, mm, imm8" xed="PEXTRW_GPR32_MMXq_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pextrw">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[15:0] := (a[63:0] &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := 0
+	</operation>
+	<instruction name="PEXTRW" form="r32, mm, imm8" xed="PEXTRW_GPR32_MMXq_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_insert_pi16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="i" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[63:0] := a[63:0]
+sel := imm8[1:0]*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<instruction name="PINSRW" form="mm, r32, imm8" xed="PINSRW_MMXq_GPR32_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pinsrw">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="i" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[63:0] := a[63:0]
+sel := imm8[1:0]*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<instruction name="PINSRW" form="mm, r32, imm8" xed="PINSRW_MMXq_GPR32_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_movemask_pi8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+dst[MAX:8] := 0
+	</operation>
+	<instruction name="PMOVMSKB" form="r32, mm" xed="PMOVMSKB_GPR32_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pmovmskb">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+dst[MAX:8] := 0
+	</operation>
+	<instruction name="PMOVMSKB" form="r32, mm" xed="PMOVMSKB_GPR32_MMXq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_shuffle_pi16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[15:0] := src[15:0]
+	1:	tmp[15:0] := src[31:16]
+	2:	tmp[15:0] := src[47:32]
+	3:	tmp[15:0] := src[63:48]
+	ESAC
+	RETURN tmp[15:0]
+}
+dst[15:0] := SELECT4(a[63:0], imm8[1:0])
+dst[31:16] := SELECT4(a[63:0], imm8[3:2])
+dst[47:32] := SELECT4(a[63:0], imm8[5:4])
+dst[63:48] := SELECT4(a[63:0], imm8[7:6])
+	</operation>
+	<instruction name="PSHUFW" form="mm, mm, imm8" xed="PSHUFW_MMXq_MMXq_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_m_pshufw">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[15:0] := src[15:0]
+	1:	tmp[15:0] := src[31:16]
+	2:	tmp[15:0] := src[47:32]
+	3:	tmp[15:0] := src[63:48]
+	ESAC
+	RETURN tmp[15:0]
+}
+dst[15:0] := SELECT4(a[63:0], imm8[1:0])
+dst[31:16] := SELECT4(a[63:0], imm8[3:2])
+dst[47:32] := SELECT4(a[63:0], imm8[5:4])
+dst[63:48] := SELECT4(a[63:0], imm8[7:6])
+	</operation>
+	<instruction name="PSHUFW" form="mm, mm, imm8" xed="PSHUFW_MMXq_MMXq_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_add_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Add the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] + b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="ADDSS" form="xmm, xmm" xed="ADDSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_add_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Add packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="ADDPS" form="xmm, xmm" xed="ADDPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_sub_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Subtract the lower single-precision (32-bit) floating-point element in "b" from the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="SUBSS" form="xmm, xmm" xed="SUBSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_sub_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Subtract packed single-precision (32-bit) floating-point elements in "b" from packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="SUBPS" form="xmm, xmm" xed="SUBPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_mul_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Multiply the lower single-precision (32-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] * b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="MULSS" form="xmm, xmm" xed="MULSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_mul_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Multiply packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="MULPS" form="xmm, xmm" xed="MULPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_div_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Divide the lower single-precision (32-bit) floating-point element in "a" by the lower single-precision (32-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] / b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="DIVSS" form="xmm, xmm" xed="DIVSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_div_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Divide packed single-precision (32-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := a[i+31:i] / b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="DIVPS" form="xmm, xmm" xed="DIVPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_sqrt_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := SQRT(a[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="SQRTSS" form="xmm, xmm" xed="SQRTSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="SQRTPS" form="xmm, xmm" xed="SQRTPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_rcp_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+dst[31:0] := (1.0 / a[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="RCPSS" form="xmm, xmm" xed="RCPSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_rcp_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (1.0 / a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="RCPPS" form="xmm, xmm" xed="RCPPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_rsqrt_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+dst[31:0] := (1.0 / SQRT(a[31:0]))
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="RSQRTSS" form="xmm, xmm" xed="RSQRTSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_rsqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". The maximum relative error for this approximation is less than 1.5*2^-12.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (1.0 / SQRT(a[i+31:i]))
+ENDFOR
+	</operation>
+	<instruction name="RSQRTPS" form="xmm, xmm" xed="RSQRTPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_min_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst".</description>
+	<operation>
+dst[31:0] := MIN(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="MINSS" form="xmm, xmm" xed="MINSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_min_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="MINPS" form="xmm, xmm" xed="MINPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_max_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst".</description>
+	<operation>
+dst[31:0] := MAX(a[31:0], b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="MAXSS" form="xmm, xmm" xed="MAXSS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_max_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="MAXPS" form="xmm, xmm" xed="MAXPS_XMMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_and_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (a[i+31:i] AND b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="ANDPS" form="xmm, xmm" xed="ANDPS_XMMxud_XMMxud"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_andnot_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ((NOT a[i+31:i]) AND b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="ANDNPS" form="xmm, xmm" xed="ANDNPS_XMMxud_XMMxud"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_or_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] OR b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="ORPS" form="xmm, xmm" xed="ORPS_XMMxud_XMMxud"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_xor_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Logical</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] XOR b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="XORPS" form="xmm, xmm" xed="XORPS_XMMxud_XMMxud"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpeq_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] == b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpeq_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmplt_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &lt; b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmplt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &lt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmple_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &lt;= b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmple_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &lt;= b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpgt_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &gt; b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpgt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpge_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] &gt;= b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpge_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt;= b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpneq_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := ( a[31:0] != b[31:0] ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpneq_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] != b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpnlt_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := (!( a[31:0] &lt; b[31:0] )) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpnlt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := !( a[i+31:i] &lt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpnle_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := (!( a[31:0] &lt;= b[31:0] )) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpnle_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (!( a[i+31:i] &lt;= b[i+31:i] )) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpngt_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := (!( a[31:0] &gt; b[31:0] )) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpngt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (!( a[i+31:i] &gt; b[i+31:i] )) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpnge_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := (!( a[31:0] &gt;= b[31:0] )) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpnge_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := (!( a[i+31:i] &gt;= b[i+31:i] )) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpord_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>dst[31:0] := ( a[31:0] != NaN AND b[31:0] != NaN ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpord_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] != NaN AND b[i+31:i] != NaN ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpunord_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>dst[31:0] := ( a[31:0] == NaN OR b[31:0] == NaN ) ? 0xFFFFFFFF : 0
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="CMPSS" form="xmm, xmm, imm8" xed="CMPSS_XMMss_XMMss_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cmpunord_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare packed single-precision (32-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == NaN OR b[i+31:i] == NaN ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPS" form="xmm, xmm, imm8" xed="CMPPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_comieq_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] == b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISS" form="xmm, xmm" xed="COMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_comilt_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &lt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISS" form="xmm, xmm" xed="COMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_comile_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &lt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISS" form="xmm, xmm" xed="COMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_comigt_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &gt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISS" form="xmm, xmm" xed="COMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_comige_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] &gt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISS" form="xmm, xmm" xed="COMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_comineq_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[31:0] != b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISS" form="xmm, xmm" xed="COMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_ucomieq_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] == b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISS" form="xmm, xmm" xed="UCOMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_ucomilt_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &lt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISS" form="xmm, xmm" xed="UCOMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_ucomile_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &lt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISS" form="xmm, xmm" xed="UCOMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_ucomigt_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &gt; b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISS" form="xmm, xmm" xed="UCOMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_ucomige_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] &gt;= b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISS" form="xmm, xmm" xed="UCOMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_ucomineq_ss">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compare the lower single-precision (32-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[31:0] != b[31:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISS" form="xmm, xmm" xed="UCOMISS_XMMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cvtss_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name="CVTSS2SI" form="r32, xmm" xed="CVTSS2SI_GPR32d_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cvt_ss2si">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32(a[31:0])
+	</operation>
+	<instruction name="CVTSS2SI" form="r32, xmm" xed="CVTSS2SI_GPR32d_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvtss_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64(a[31:0])
+	</operation>
+	<instruction name="CVTSS2SI" form="r64, xmm" xed="CVTSS2SI_GPR64q_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvtss_f32">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="float" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Copy the lower single-precision (32-bit) floating-point element of "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="MOVSS" form="m32, xmm" xed="MOVSS_MEMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvtps_pi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTPS2PI" form="mm, xmm" xed="CVTPS2PI_MMXq_XMMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvt_ps2pi">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTPS2PI" form="mm, xmm" xed="CVTPS2PI_MMXq_XMMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cvttss_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name="CVTTSS2SI" form="r32, xmm" xed="CVTTSS2SI_GPR32d_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_cvtt_ss2si">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0])
+	</operation>
+	<instruction name="CVTTSS2SI" form="r32, xmm" xed="CVTTSS2SI_GPR32d_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvttss_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0])
+	</operation>
+	<instruction name="CVTTSS2SI" form="r64, xmm" xed="CVTTSS2SI_GPR64q_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvttps_pi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTTPS2PI" form="mm, xmm" xed="CVTTPS2PI_MMXq_XMMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_cvtt_ps2pi">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTTPS2PI" form="mm, xmm" xed="CVTTPS2PI_MMXq_XMMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_cvtps_pi16">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 16-bit integers, and store the results in "dst". Note: this intrinsic will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and 0x7FFFFFFF.</description>
+	<operation>
+FOR j := 0 to 3
+	i := 16*j
+	k := 32*j
+	IF a[k+31:k] &gt;= FP32(0x7FFF) &amp;&amp; a[k+31:k] &lt;= FP32(0x7FFFFFFF)
+		dst[i+15:i] := 0x7FFF
+	ELSE
+		dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k])
+	FI
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_cvtps_pi8">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="SI8"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 8-bit integers, and store the results in lower 4 elements of "dst". Note: this intrinsic will generate 0x7F, rather than 0x80, for input values between 0x7F and 0x7FFFFFFF.</description>
+	<operation>
+FOR j := 0 to 3
+	i := 8*j
+	k := 32*j
+	IF a[k+31:k] &gt;= FP32(0x7F) &amp;&amp; a[k+31:k] &lt;= FP32(0x7FFFFFFF)
+		dst[i+7:i] := 0x7F
+	ELSE
+		dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k])
+	FI
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_set_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Copy single-precision (32-bit) floating-point element "a" to the lower element of "dst", and zero the upper 3 elements.</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[127:32] := 0
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_set1_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_set_ps1">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="a" etype="FP32"/>
+	<description>Broadcast single-precision (32-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_set_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="e3" etype="FP32"/>
+	<parameter type="float" varname="e2" etype="FP32"/>
+	<parameter type="float" varname="e1" etype="FP32"/>
+	<parameter type="float" varname="e0" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_setr_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float" varname="e3" etype="FP32"/>
+	<parameter type="float" varname="e2" etype="FP32"/>
+	<parameter type="float" varname="e1" etype="FP32"/>
+	<parameter type="float" varname="e0" etype="FP32"/>
+	<description>Set packed single-precision (32-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e3
+dst[63:32] := e2
+dst[95:64] := e1
+dst[127:96] := e0
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_setzero_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Set</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m128 with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="XORPS" form="xmm, xmm" xed="XORPS_XMMxud_XMMxud"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_loadh_pi">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m64 const*" varname="mem_addr" etype="FP32" memwidth="64"/>
+	<description>Load 2 single-precision (32-bit) floating-point elements from memory into the upper 2 elements of "dst", and copy the lower 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := a[63:32]
+dst[95:64] := MEM[mem_addr+31:mem_addr]
+dst[127:96] := MEM[mem_addr+63:mem_addr+32]
+	</operation>
+	<instruction name="MOVHPS" form="xmm, m64" xed="MOVHPS_XMMq_MEMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_loadl_pi">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m64 const*" varname="mem_addr" etype="FP32" memwidth="64"/>
+	<description>Load 2 single-precision (32-bit) floating-point elements from memory into the lower 2 elements of "dst", and copy the upper 2 elements from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[63:32] := MEM[mem_addr+63:mem_addr+32]
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name="MOVLPS" form="xmm, m64" xed="MOVLPS_XMMq_MEMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_load_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<description>Load a single-precision (32-bit) floating-point element from memory into the lower of "dst", and zero the upper 3 elements. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[127:32] := 0
+	</operation>
+	<instruction name="MOVSS" form="xmm, m32" xed="MOVSS_XMMdq_MEMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_load1_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<description>Load a single-precision (32-bit) floating-point element from memory into all elements of "dst".</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[63:32] := MEM[mem_addr+31:mem_addr]
+dst[95:64] := MEM[mem_addr+31:mem_addr]
+dst[127:96] := MEM[mem_addr+31:mem_addr]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_load_ps1">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<description>Load a single-precision (32-bit) floating-point element from memory into all elements of "dst".</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[63:32] := MEM[mem_addr+31:mem_addr]
+dst[95:64] := MEM[mem_addr+31:mem_addr]
+dst[127:96] := MEM[mem_addr+31:mem_addr]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_load_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="MOVAPS" form="xmm, m128" xed="MOVAPS_XMMps_MEMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_loadu_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="MOVUPS" form="xmm, m128" xed="MOVUPS_XMMps_MEMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_loadr_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="float const*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<description>Load 4 single-precision (32-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+127:mem_addr+96]
+dst[63:32] := MEM[mem_addr+95:mem_addr+64]
+dst[95:64] := MEM[mem_addr+63:mem_addr+32]
+dst[127:96] := MEM[mem_addr+31:mem_addr]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_stream_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVNTPS" form="m128, xmm" xed="MOVNTPS_MEMdq_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_storeh_pi">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m64*" varname="mem_addr" etype="FP32" memwidth="64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store the upper 2 single-precision (32-bit) floating-point elements from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[95:64]
+MEM[mem_addr+63:mem_addr+32] := a[127:96]
+	</operation>
+	<instruction name="MOVHPS" form="m64, xmm" xed="MOVHPS_MEMq_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_storel_pi">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m64*" varname="mem_addr" etype="FP32" memwidth="64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store the lower 2 single-precision (32-bit) floating-point elements from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+MEM[mem_addr+63:mem_addr+32] := a[63:32]
+	</operation>
+	<instruction name="MOVLPS" form="m64, xmm" xed="MOVLPS_MEMq_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_store_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+	</operation>
+	<instruction name="MOVSS" form="m32, xmm" xed="MOVSS_MEMss_XMMss"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_store1_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+MEM[mem_addr+63:mem_addr+32] := a[31:0]
+MEM[mem_addr+95:mem_addr+64] := a[31:0]
+MEM[mem_addr+127:mem_addr+96] := a[31:0]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_store_ps1">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store the lower single-precision (32-bit) floating-point element from "a" into 4 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+MEM[mem_addr+63:mem_addr+32] := a[31:0]
+MEM[mem_addr+95:mem_addr+64] := a[31:0]
+MEM[mem_addr+127:mem_addr+96] := a[31:0]
+	</operation>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_store_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVAPS" form="m128, xmm" xed="MOVAPS_MEMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_storeu_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVUPS" form="m128, xmm" xed="MOVUPS_MEMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_storer_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="float*" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Store 4 single-precision (32-bit) floating-point elements from "a" into memory in reverse order.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[127:96]
+MEM[mem_addr+63:mem_addr+32] := a[95:64]
+MEM[mem_addr+95:mem_addr+64] := a[63:32]
+MEM[mem_addr+127:mem_addr+96] := a[31:0]
+	</operation>
+	<instruction name="MOVUPS" form="m128, xmm" xed="MOVUPS_MEMps_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_move_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Move the lower single-precision (32-bit) floating-point element from "b" to the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := b[31:0]
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="MOVSS" form="xmm, xmm" xed="MOVSS_XMMss_XMMss_0F10"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_shuffle_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="unsigned int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(b[127:0], imm8[5:4])
+dst[127:96] := SELECT4(b[127:0], imm8[7:6])
+	</operation>
+	<instruction name="SHUFPS" form="xmm, xmm, imm8" xed="SHUFPS_XMMps_XMMps_IMMb"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_unpackhi_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the high half "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="UNPCKHPS" form="xmm, xmm" xed="UNPCKHPS_XMMps_XMMdq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_unpacklo_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Unpack and interleave single-precision (32-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="UNPCKLPS" form="xmm, xmm" xed="UNPCKLPS_XMMps_XMMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_movehl_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Move the upper 2 single-precision (32-bit) floating-point elements from "b" to the lower 2 elements of "dst", and copy the upper 2 elements from "a" to the upper 2 elements of "dst".</description>
+	<operation>
+dst[31:0] := b[95:64]
+dst[63:32] := b[127:96]
+dst[95:64] := a[95:64]
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name="MOVHLPS" form="xmm, xmm" xed="MOVHLPS_XMMq_XMMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_movelh_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Move the lower 2 single-precision (32-bit) floating-point elements from "b" to the upper 2 elements of "dst", and copy the lower 2 elements from "a" to the lower 2 elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[63:32] := a[63:32]
+dst[95:64] := b[31:0]
+dst[127:96] := b[63:32]
+	</operation>
+	<instruction name="MOVLHPS" form="xmm, xmm" xed="MOVLHPS_XMMq_XMMq"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" vexEq="TRUE" name="_mm_movemask_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF a[i+31]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:4] := 0
+	</operation>
+	<instruction name="MOVMSKPS" form="r32, xmm" xed="MOVMSKPS_GPR32_XMMps"/>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_malloc">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void*"/>
+	<parameter type="size_t" varname="size" etype="UI64"/>
+	<parameter type="size_t" varname="align" etype="UI64"/>
+	<description>Allocate "size" bytes of memory, aligned to the alignment specified in "align", and return a pointer to the allocated memory. "_mm_free" should be used to free memory that is allocated with "_mm_malloc".</description>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_free">
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<description>Free aligned memory that was allocated with "_mm_malloc".</description>
+	<header>xmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_undefined_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>General Support</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m128 with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_acos_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ACOS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_acos_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ACOS(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_acosh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ACOSH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_acosh_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ACOSH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_asin_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ASIN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_asin_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ASIN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_asinh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ASINH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_asinh_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ASINH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_atan_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ATAN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_atan_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ATAN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_atan2_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ATAN2(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_atan2_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in "a" divided by packed elements in "b", and store the results in "dst" expressed in radians.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ATAN2(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_atanh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ATANH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_atanh_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ATANH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cbrt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cbrt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cdfnorm_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cdfnorm_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cdfnorminv_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cumulative distribution function of packed double-precision (64-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := InverseCDFNormal(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cdfnorminv_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in "a" using the normal distribution, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := InverseCDFNormal(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cexp_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]".</description>
+	<operation>
+DEFINE CEXP(a[31:0], b[31:0]) {
+	result[31:0]  := POW(FP32(e), a[31:0]) * COS(b[31:0])
+	result[63:32] := POW(FP32(e), a[31:0]) * SIN(b[31:0])
+	RETURN result
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CEXP(a[i+31:i], a[i+63:i+32])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_clog_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of packed complex numbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]".</description>
+	<operation>
+DEFINE CLOG(a[31:0], b[31:0]) {
+	result[31:0]  := LOG(SQRT(POW(a, 2.0) + POW(b, 2.0)))
+	result[63:32] := ATAN2(b, a)
+	RETURN result
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CLOG(a[i+31:i], a[i+63:i+32])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cos_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cos_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cosd_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := COSD(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cosd_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := COSD(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cosh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := COSH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_cosh_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := COSH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_csqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed complex snumbers in "a", and store the complex results in "dst". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number "complex = vec.fp32[0] + i * vec.fp32[1]".</description>
+	<operation>
+DEFINE CSQRT(a[31:0], b[31:0]) {
+	sign[31:0] := (b &lt; 0.0) ? -FP32(1.0) : FP32(1.0)
+	result[31:0]  := SQRT((a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0)
+	result[63:32] := sign * SQRT((-a + SQRT(POW(a, 2.0) + POW(b, 2.0))) / 2.0)
+	RETURN result
+}
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CSQRT(a[i+31:i], a[i+63:i+32])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epi8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Divide packed signed 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 8*j
+	IF b[i+7:i] == 0
+		#DE
+	FI
+	dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epi16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Divide packed signed 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	IF b[i+15:i] == 0
+		#DE
+	FI
+	dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF b[i+31:i] == 0
+		#DE
+	FI
+	dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epi64">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Divide packed signed 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	IF b[i+63:i] == 0
+		#DE
+	FI
+	dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epu8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := 8*j
+	IF b[i+7:i] == 0
+		#DE
+	FI
+	dst[i+7:i] := Truncate8(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epu16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := 16*j
+	IF b[i+15:i] == 0
+		#DE
+	FI
+	dst[i+15:i] := Truncate16(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epu32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	IF b[i+31:i] == 0
+		#DE
+	FI
+	dst[i+31:i] := Truncate32(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_div_epu64">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	IF b[i+63:i] == 0
+		#DE
+	FI
+	dst[i+63:i] := Truncate64(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erf_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ERF(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erf_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ERF(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erfc_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 1.0 - ERF(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erfc_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+63:i] := 1.0 - ERF(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erfcinv_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse complementary error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+63:i]))
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erfcinv_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+63:i] := 1.0 / (1.0 - ERF(a[i+31:i]))
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erfinv_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse error function of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := 1.0 / ERF(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_erfinv_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse error function of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+63:i] := 1.0 / ERF(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_exp_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := POW(e, a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_exp_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := POW(FP32(e), a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_exp10_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := POW(10.0, a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_exp10_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := POW(FP32(10.0), a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_exp2_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := POW(2.0, a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_exp2_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := POW(FP32(2.0), a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_expm1_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the exponential value of "e" raised to the power of packed double-precision (64-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := POW(e, a[i+63:i]) - 1.0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_expm1_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the exponential value of "e" raised to the power of packed single-precision (32-bit) floating-point elements in "a", subtract one from each element, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := POW(FP32(e), a[i+31:i]) - 1.0
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_hypot_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SQRT(POW(a[i+63:i], 2.0) + POW(b[i+63:i], 2.0))
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_hypot_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SQRT(POW(a[i+31:i], 2.0) + POW(b[i+31:i], 2.0))
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_idiv_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_idivrem_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i *" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed 32-bit integers into memory at "mem_addr".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_invcbrt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := InvCubeRoot(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_invcbrt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := InvCubeRoot(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_invsqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the inverse square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := InvSQRT(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_invsqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the inverse square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := InvSQRT(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_irem_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log10_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i]) / LOG(10.0)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log10_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i]) / LOG(10.0)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log1p_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LOG(1.0 + a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log1p_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LOG(1.0 + a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log2_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := LOG(a[i+63:i]) / LOG(2.0)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_log2_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := LOG(a[i+31:i]) / LOG(2.0)
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_logb_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the exponent of each packed double-precision (64-bit) floating-point element in "a" to a double-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ConvertExpFP64(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_logb_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert the exponent of each packed single-precision (32-bit) floating-point element in "a" to a single-precision floating-point number representing the integer exponent, and store the results in "dst". This intrinsic essentially calculates "floor(log2(x))" for each element.</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ConvertExpFP32(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_pow_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the exponential value of packed double-precision (64-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := POW(a[i+63:i], b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_pow_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Compute the exponential value of packed single-precision (32-bit) floating-point elements in "a" raised by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := POW(a[i+31:i], b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epi8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Divide packed 8-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epi16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Divide packed 16-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed 32-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epi64">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Divide packed 64-bit integers in "a" by packed elements in "b", and store the remainders as packed 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epu8">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Divide packed unsigned 8-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 15
+	i := 8*j
+	dst[i+7:i] := REMAINDER(a[i+7:i] / b[i+7:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epu16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Divide packed unsigned 16-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 7
+	i := 16*j
+	dst[i+15:i] := REMAINDER(a[i+15:i] / b[i+15:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epu32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_rem_epu64">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Divide packed unsigned 64-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := REMAINDER(a[i+63:i] / b[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sin_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sin_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sincos_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d *" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SIN(a[i+63:i])
+	MEM[mem_addr+i+63:mem_addr+i] := COS(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sincos_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128 *" varname="mem_addr" etype="FP32" memwidth="128"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, store the sine in "dst", and store the cosine into memory at "mem_addr".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SIN(a[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := COS(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sind_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the sine of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SIND(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sind_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the sine of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SIND(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sinh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SINH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_sinh_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SINH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_ceil_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_ceil_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_floor_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_floor_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_round_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_round_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" to the nearest integer value, and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_pd".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_svml_sqrt_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst". Note that this intrinsic is less efficient than "_mm_sqrt_ps".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SQRT(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_tan_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TAN(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_tan_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TAN(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_tand_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TAND(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_tand_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in degrees, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TAND(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_tanh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TANH(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_tanh_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Trigonometry</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in "a" expressed in radians, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TANH(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_trunc_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Truncate the packed double-precision (64-bit) floating-point elements in "a", and store the results as packed double-precision floating-point elements in "dst". This intrinsic may generate the "roundpd"/"vroundpd" instruction.</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := TRUNCATE(a[i+63:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_trunc_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Truncate the packed single-precision (32-bit) floating-point elements in "a", and store the results as packed single-precision floating-point elements in "dst". This intrinsic may generate the "roundps"/"vroundps" instruction.</description>
+	<operation>FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := TRUNCATE(a[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_udiv_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the truncated results in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_udivrem_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i *" varname="mem_addr" etype="UI32" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", store the truncated results in "dst", and store the remainders as packed unsigned 32-bit integers into memory at "mem_addr".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := TRUNCATE(a[i+31:i] / b[i+31:i])
+	MEM[mem_addr+i+31:mem_addr+i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SVML" sequence="TRUE" name="_mm_urem_epi32">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Divide packed unsigned 32-bit integers in "a" by packed elements in "b", and store the remainders as packed unsigned 32-bit integers in "dst".</description>
+	<operation>FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := REMAINDER(a[i+31:i] / b[i+31:i])
+ENDFOR
+dst[MAX:128] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_storeu_si16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI16" memwidth="16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Store 16-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+15:mem_addr] := a[15:0]
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_loadu_si64">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI64" memwidth="64"/>
+	<description>Load unaligned 64-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="MOVQ" form="xmm, m64" xed="MOVQ_XMMdq_MEMq_0F6E"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" name="_mm_storeu_si64">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI64" memwidth="64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Store 64-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="m64, xmm" xed="MOVQ_MEMq_XMMq_0F7E"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE" sequence="TRUE" name="_mm_loadu_si16">
+	<type>Integer</type>
+	<CPUID>SSE</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI16" memwidth="16"/>
+	<description>Load unaligned 16-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[15:0] := MEM[mem_addr+15:mem_addr]
+dst[MAX:16] := 0
+	</operation>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_undefined_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m128d with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_undefined_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m128i with undefined elements.</description>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_loadu_si32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="void const*" varname="mem_addr" etype="UI32" memwidth="32"/>
+	<description>Load unaligned 32-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[31:0] := MEM[mem_addr+31:mem_addr]
+dst[MAX:32] := 0
+	</operation>
+	<instruction name="MOVD" form="xmm, m32" xed="MOVD_XMMdq_MEMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_storeu_si32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="void*" varname="mem_addr" etype="UI32" memwidth="32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Store 32-bit integer from the first element of "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+	</operation>
+	<instruction name="MOVD" form="m32, xmm" xed="MOVD_MEMd_XMMd"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_pause">
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Provide a hint to the processor that the code sequence is a spin-wait loop. This can help improve the performance and power consumption of spin-wait loops.</description>
+	<instruction name="PAUSE" xed="PAUSE"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_clflush">
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void const*" varname="p"/>
+	<description>Invalidate and flush the cache line that contains "p" from all levels of the cache hierarchy.</description>
+	<instruction name="CLFLUSH" form="m8" xed="CLFLUSH_MEMmprefetch"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_lfence">
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order.</description>
+	<instruction name="LFENCE" xed="LFENCE"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_mfence">
+	<CPUID>SSE2</CPUID>
+	<category>General Support</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Perform a serializing operation on all load-from-memory and store-to-memory instructions that were issued prior to this instruction. Guarantees that every memory access that precedes, in program order, the memory fence instruction is globally visible before any memory instruction which follows the fence in program order.</description>
+	<instruction name="MFENCE" xed="MFENCE"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_add_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Add packed 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[i+7:i] + b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDB" form="xmm, xmm" xed="PADDB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_add_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Add packed 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[i+15:i] + b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDW" form="xmm, xmm" xed="PADDW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_add_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Add packed 32-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] + b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDD" form="xmm, xmm" xed="PADDD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_add_si64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Add 64-bit integers "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] + b[63:0]
+	</operation>
+	<instruction name="PADDQ" form="mm, mm" xed="PADDQ_MMXq_MMXq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_add_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Add packed 64-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="PADDQ" form="xmm, xmm" xed="PADDQ_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_adds_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Add packed signed 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDSB" form="xmm, xmm" xed="PADDSB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_adds_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Add packed signed 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDSW" form="xmm, xmm" xed="PADDSW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_adds_epu8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := SaturateU8( a[i+7:i] + b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDUSB" form="xmm, xmm" xed="PADDUSB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_adds_epu16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := SaturateU16( a[i+15:i] + b[i+15:i] )
+ENDFOR
+	</operation>
+	<instruction name="PADDUSW" form="xmm, xmm" xed="PADDUSW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_avg_epu8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name="PAVGB" form="xmm, xmm" xed="PAVGB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_avg_epu16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Probability/Statistics</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) &gt;&gt; 1
+ENDFOR
+	</operation>
+	<instruction name="PAVGW" form="xmm, xmm" xed="PAVGW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_madd_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := SignExtend32(a[i+31:i+16]*b[i+31:i+16]) + SignExtend32(a[i+15:i]*b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMADDWD" form="xmm, xmm" xed="PMADDWD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_max_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXSW" form="xmm, xmm" xed="PMAXSW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_max_epu8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXUB" form="xmm, xmm" xed="PMAXUB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_min_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINSW" form="xmm, xmm" xed="PMINSW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_min_epu8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINUB" form="xmm, xmm" xed="PMINUB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_mulhi_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply the packed signed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name="PMULHW" form="xmm, xmm" xed="PMULHW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_mulhi_epu16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := a[i+15:i] * b[i+15:i]
+	dst[i+15:i] := tmp[31:16]
+ENDFOR
+	</operation>
+	<instruction name="PMULHUW" form="xmm, xmm" xed="PMULHUW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_mullo_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])
+	dst[i+15:i] := tmp[15:0]
+ENDFOR
+	</operation>
+	<instruction name="PMULLW" form="xmm, xmm" xed="PMULLW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_mul_su32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI32"/>
+	<parameter type="__m64" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from "a" and "b", and store the unsigned 64-bit result in "dst".</description>
+	<operation>
+dst[63:0] := a[31:0] * b[31:0]
+	</operation>
+	<instruction name="PMULUDQ" form="mm, mm" xed="PMULUDQ_MMXq_MMXq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_mul_epu32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+31:i] * b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="PMULUDQ" form="xmm, xmm" xed="PMULUDQ_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sad_epu8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
+ENDFOR
+FOR j := 0 to 1
+	i := j*64
+	dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \
+	               tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
+	dst[i+63:i+16] := 0
+ENDFOR
+	</operation>
+	<instruction name="PSADBW" form="xmm, xmm" xed="PSADBW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sub_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[i+7:i] - b[i+7:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBB" form="xmm, xmm" xed="PSUBB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sub_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[i+15:i] - b[i+15:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBW" form="xmm, xmm" xed="PSUBW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sub_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[i+31:i] - b[i+31:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBD" form="xmm, xmm" xed="PSUBD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sub_si64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<parameter type="__m64" varname="b" etype="UI64"/>
+	<description>Subtract 64-bit integer "b" from 64-bit integer "a", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - b[63:0]
+	</operation>
+	<instruction name="PSUBQ" form="mm, mm" xed="PSUBQ_MMXq_MMXq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sub_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="PSUBQ" form="xmm, xmm" xed="PSUBQ_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_subs_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Subtract packed signed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := Saturate8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBSB" form="xmm, xmm" xed="PSUBSB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_subs_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Subtract packed signed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate16(a[i+15:i] - b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PSUBSW" form="xmm, xmm" xed="PSUBSW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_subs_epu8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := SaturateU8(a[i+7:i] - b[i+7:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBUSB" form="xmm, xmm" xed="PSUBUSB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_subs_epu16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := SaturateU16(a[i+15:i] - b[i+15:i])	
+ENDFOR
+	</operation>
+	<instruction name="PSUBUSW" form="xmm, xmm" xed="PSUBUSW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_slli_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+	</operation>
+	<instruction name="PSLLDQ" form="xmm, imm8" xed="PSLLDQ_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_bslli_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &lt;&lt; (tmp*8)
+	</operation>
+	<instruction name="PSLLDQ" form="xmm, imm8" xed="PSLLDQ_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_bsrli_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+	</operation>
+	<instruction name="PSRLDQ" form="xmm, imm8" xed="PSRLDQ_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_slli_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLW" form="xmm, imm8" xed="PSLLW_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sll_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLW" form="xmm, xmm" xed="PSLLW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_slli_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLD" form="xmm, imm8" xed="PSLLD_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sll_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLD" form="xmm, xmm" xed="PSLLD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_slli_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLQ" form="xmm, imm8" xed="PSLLQ_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sll_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &lt;&lt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSLLQ" form="xmm, xmm" xed="PSLLQ_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srai_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAW" form="xmm, imm8" xed="PSRAW_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sra_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0)
+	ELSE
+		dst[i+15:i] := SignExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAW" form="xmm, xmm" xed="PSRAW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srai_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAD" form="xmm, imm8" xed="PSRAD_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sra_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0)
+	ELSE
+		dst[i+31:i] := SignExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRAD" form="xmm, xmm" xed="PSRAD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srli_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+tmp := imm8[7:0]
+IF tmp &gt; 15
+	tmp := 16
+FI
+dst[127:0] := a[127:0] &gt;&gt; (tmp*8)
+	</operation>
+	<instruction name="PSRLDQ" form="xmm, imm8" xed="PSRLDQ_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srli_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[7:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLW" form="xmm, imm8" xed="PSRLW_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srl_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="count" etype="UI16"/>
+	<description>Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF count[63:0] &gt; 15
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := ZeroExtend16(a[i+15:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLW" form="xmm, xmm" xed="PSRLW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srli_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[7:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLD" form="xmm, imm8" xed="PSRLD_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srl_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="count" etype="UI32"/>
+	<description>Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF count[63:0] &gt; 31
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := ZeroExtend32(a[i+31:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLD" form="xmm, xmm" xed="PSRLD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srli_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[7:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; imm8[7:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLQ" form="xmm, imm8" xed="PSRLQ_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_srl_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Shift</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="count" etype="UI64"/>
+	<description>Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF count[63:0] &gt; 63
+		dst[i+63:i] := 0
+	ELSE
+		dst[i+63:i] := ZeroExtend64(a[i+63:i] &gt;&gt; count[63:0])
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSRLQ" form="xmm, xmm" xed="PSRLQ_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_and_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := (a[127:0] AND b[127:0])
+	</operation>
+	<instruction name="PAND" form="xmm, xmm" xed="PAND_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_andnot_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<description>Compute the bitwise NOT of 128 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := ((NOT a[127:0]) AND b[127:0])
+	</operation>
+	<instruction name="PANDN" form="xmm, xmm" xed="PANDN_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_or_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<description>Compute the bitwise OR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := (a[127:0] OR b[127:0])
+	</operation>
+	<instruction name="POR" form="xmm, xmm" xed="POR_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_xor_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<description>Compute the bitwise XOR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst".</description>
+	<operation>
+dst[127:0] := (a[127:0] XOR b[127:0])
+	</operation>
+	<instruction name="PXOR" form="xmm, xmm" xed="PXOR_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpeq_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQB" form="xmm, xmm" xed="PCMPEQB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpeq_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQW" form="xmm, xmm" xed="PCMPEQW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpeq_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQD" form="xmm, xmm" xed="PCMPEQD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpgt_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &gt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTB" form="xmm, xmm" xed="PCMPGTB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpgt_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &gt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTW" form="xmm, xmm" xed="PCMPGTW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpgt_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &gt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTD" form="xmm, xmm" xed="PCMPGTD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmplt_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ( a[i+7:i] &lt; b[i+7:i] ) ? 0xFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTB" form="xmm, xmm" xed="PCMPGTB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmplt_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Compare packed signed 16-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ( a[i+15:i] &lt; b[i+15:i] ) ? 0xFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTW" form="xmm, xmm" xed="PCMPGTW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmplt_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ( a[i+31:i] &lt; b[i+31:i] ) ? 0xFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTD" form="xmm, xmm" xed="PCMPGTD_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtepi32_pd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTDQ2PD" form="xmm, xmm" xed="CVTDQ2PD_XMMpd_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi32_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="b" etype="SI32"/>
+	<description>Convert the signed 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_Int32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="CVTSI2SD" form="xmm, r32" xed="CVTSI2SD_XMMsd_GPR32d"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi64_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<description>Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="CVTSI2SD" form="xmm, r64" xed="CVTSI2SD_XMMsd_GPR64q"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi64x_sd">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__int64" varname="b" etype="SI64"/>
+	<description>Convert the signed 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_Int64_To_FP64(b[63:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="CVTSI2SD" form="xmm, r64" xed="CVTSI2SD_XMMsd_GPR64q"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtepi32_ps">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_Int32_To_FP32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTDQ2PS" form="xmm, xmm" xed="CVTDQ2PS_XMMps_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_cvtpi32_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	m := j*64
+	dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTPI2PD" form="xmm, mm" xed="CVTPI2PD_XMMpd_MMXq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi32_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+dst[127:32] := 0
+	</operation>
+	<instruction name="MOVD" form="xmm, r32" xed="MOVD_XMMdq_GPR32"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi64_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name="MOVQ" form="xmm, r64" xed="MOVQ_XMMdq_GPR64"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi64x_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name="MOVQ" form="xmm, r64" xed="MOVQ_XMMdq_GPR64"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi128_si32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Copy the lower 32-bit integer in "a" to "dst".</description>
+	<operation>
+dst[31:0] := a[31:0]
+	</operation>
+	<instruction name="MOVD" form="r32, xmm" xed="MOVD_GPR32_XMMd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi128_si64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="r64, xmm" xed="MOVQ_GPR64_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsi128_si64x">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="r64, xmm" xed="MOVQ_GPR64_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m64" varname="e1" etype="UI64"/>
+	<parameter type="__m64" varname="e0" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_epi64x">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="e1" etype="UI64"/>
+	<parameter type="__int64" varname="e0" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="e3" etype="UI32"/>
+	<parameter type="int" varname="e2" etype="UI32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[31:0] := e0
+dst[63:32] := e1
+dst[95:64] := e2
+dst[127:96] := e3
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="e7" etype="UI16"/>
+	<parameter type="short" varname="e6" etype="UI16"/>
+	<parameter type="short" varname="e5" etype="UI16"/>
+	<parameter type="short" varname="e4" etype="UI16"/>
+	<parameter type="short" varname="e3" etype="UI16"/>
+	<parameter type="short" varname="e2" etype="UI16"/>
+	<parameter type="short" varname="e1" etype="UI16"/>
+	<parameter type="short" varname="e0" etype="UI16"/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[15:0] := e0
+dst[31:16] := e1
+dst[47:32] := e2
+dst[63:48] := e3
+dst[79:64] := e4
+dst[95:80] := e5
+dst[111:96] := e6
+dst[127:112] := e7
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="e15" etype="UI8"/>
+	<parameter type="char" varname="e14" etype="UI8"/>
+	<parameter type="char" varname="e13" etype="UI8"/>
+	<parameter type="char" varname="e12" etype="UI8"/>
+	<parameter type="char" varname="e11" etype="UI8"/>
+	<parameter type="char" varname="e10" etype="UI8"/>
+	<parameter type="char" varname="e9" etype="UI8"/>
+	<parameter type="char" varname="e8" etype="UI8"/>
+	<parameter type="char" varname="e7" etype="UI8"/>
+	<parameter type="char" varname="e6" etype="UI8"/>
+	<parameter type="char" varname="e5" etype="UI8"/>
+	<parameter type="char" varname="e4" etype="UI8"/>
+	<parameter type="char" varname="e3" etype="UI8"/>
+	<parameter type="char" varname="e2" etype="UI8"/>
+	<parameter type="char" varname="e1" etype="UI8"/>
+	<parameter type="char" varname="e0" etype="UI8"/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values.</description>
+	<operation>
+dst[7:0] := e0
+dst[15:8] := e1
+dst[23:16] := e2
+dst[31:24] := e3
+dst[39:32] := e4
+dst[47:40] := e5
+dst[55:48] := e6
+dst[63:56] := e7
+dst[71:64] := e8
+dst[79:72] := e9
+dst[87:80] := e10
+dst[95:88] := e11
+dst[103:96] := e12
+dst[111:104] := e13
+dst[119:112] := e14
+dst[127:120] := e15
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set1_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set1_epi64x">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set1_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastd".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := a[31:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set1_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="a" etype="UI16"/>
+	<description>Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate "vpbroadcastw".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := a[15:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set1_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="a" etype="UI8"/>
+	<description>Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastb".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := a[7:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_setr_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m64" varname="e1" etype="UI64"/>
+	<parameter type="__m64" varname="e0" etype="UI64"/>
+	<description>Set packed 64-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e1
+dst[127:64] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_setr_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="int" varname="e3" etype="UI32"/>
+	<parameter type="int" varname="e2" etype="UI32"/>
+	<parameter type="int" varname="e1" etype="UI32"/>
+	<parameter type="int" varname="e0" etype="UI32"/>
+	<description>Set packed 32-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[31:0] := e3
+dst[63:32] := e2
+dst[95:64] := e1
+dst[127:96] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_setr_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="short" varname="e7" etype="UI16"/>
+	<parameter type="short" varname="e6" etype="UI16"/>
+	<parameter type="short" varname="e5" etype="UI16"/>
+	<parameter type="short" varname="e4" etype="UI16"/>
+	<parameter type="short" varname="e3" etype="UI16"/>
+	<parameter type="short" varname="e2" etype="UI16"/>
+	<parameter type="short" varname="e1" etype="UI16"/>
+	<parameter type="short" varname="e0" etype="UI16"/>
+	<description>Set packed 16-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[15:0] := e7
+dst[31:16] := e6
+dst[47:32] := e5
+dst[63:48] := e4
+dst[79:64] := e3
+dst[95:80] := e2
+dst[111:96] := e1
+dst[127:112] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_setr_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="char" varname="e15" etype="UI8"/>
+	<parameter type="char" varname="e14" etype="UI8"/>
+	<parameter type="char" varname="e13" etype="UI8"/>
+	<parameter type="char" varname="e12" etype="UI8"/>
+	<parameter type="char" varname="e11" etype="UI8"/>
+	<parameter type="char" varname="e10" etype="UI8"/>
+	<parameter type="char" varname="e9" etype="UI8"/>
+	<parameter type="char" varname="e8" etype="UI8"/>
+	<parameter type="char" varname="e7" etype="UI8"/>
+	<parameter type="char" varname="e6" etype="UI8"/>
+	<parameter type="char" varname="e5" etype="UI8"/>
+	<parameter type="char" varname="e4" etype="UI8"/>
+	<parameter type="char" varname="e3" etype="UI8"/>
+	<parameter type="char" varname="e2" etype="UI8"/>
+	<parameter type="char" varname="e1" etype="UI8"/>
+	<parameter type="char" varname="e0" etype="UI8"/>
+	<description>Set packed 8-bit integers in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[7:0] := e15
+dst[15:8] := e14
+dst[23:16] := e13
+dst[31:24] := e12
+dst[39:32] := e11
+dst[47:40] := e10
+dst[55:48] := e9
+dst[63:56] := e8
+dst[71:64] := e7
+dst[79:72] := e6
+dst[87:80] := e5
+dst[95:88] := e4
+dst[103:96] := e3
+dst[111:104] := e2
+dst[119:112] := e1
+dst[127:120] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_setzero_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<description>Return vector of type __m128i with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="PXOR" form="xmm, xmm" xed="PXOR_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_loadl_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i const*" varname="mem_addr" etype="UI64" memwidth="64"/>
+	<description>Load 64-bit integer from memory into the first element of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[MAX:64] := 0
+	</operation>
+	<instruction name="MOVQ" form="xmm, m64" xed="MOVQ_XMMdq_MEMq_0F7E"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_load_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i const*" varname="mem_addr" etype="M128" memwidth="128"/>
+	<description>Load 128-bits of integer data from memory into "dst". 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="MOVDQA" form="xmm, m128" xed="MOVDQA_XMMdq_MEMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_loadu_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i const*" varname="mem_addr" etype="M128" memwidth="128"/>
+	<description>Load 128-bits of integer data from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="MOVDQU" form="xmm, m128" xed="MOVDQU_XMMdq_MEMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_maskmoveu_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="mask" etype="UI8"/>
+	<parameter type="char*" varname="mem_addr" etype="UI8" memwidth="128"/>
+	<description>Conditionally store 8-bit integer elements from "a" into memory using "mask" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF mask[i+7]
+		MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="MASKMOVDQU" form="xmm, xmm" xed="MASKMOVDQU_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_store_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m128i*" varname="mem_addr" etype="M128" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<description>Store 128-bits of integer data from "a" into memory. 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVDQA" form="m128, xmm" xed="MOVDQA_MEMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_storeu_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m128i*" varname="mem_addr" etype="M128" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<description>Store 128-bits of integer data from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVDQU" form="m128, xmm" xed="MOVDQU_MEMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_storel_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m128i*" varname="mem_addr" etype="UI64" memwidth="64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Store 64-bit integer from the first element of "a" into memory.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name="MOVQ" form="m64, xmm" xed="MOVQ_MEMq_XMMq_0F7E"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_stream_si128">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__m128i*" varname="mem_addr" etype="M128" memwidth="128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<description>Store 128-bits of integer data from "a" into memory using a non-temporal memory hint. 
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVNTDQ" form="m128, xmm" xed="MOVNTDQ_MEMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_stream_si32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="int*" varname="mem_addr" etype="UI32" memwidth="32"/>
+	<parameter type="int" varname="a" etype="UI32"/>
+	<description>Store 32-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated.</description>
+	<operation>
+MEM[mem_addr+31:mem_addr] := a[31:0]
+	</operation>
+	<instruction name="MOVNTI" form="m32, r32" xed="MOVNTI_MEMd_GPR32"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_stream_si64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="__int64*" varname="mem_addr" etype="UI64" memwidth="64"/>
+	<parameter type="__int64" varname="a" etype="UI64"/>
+	<description>Store 64-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name="MOVNTI" form="m64, r64" xed="MOVNTI_MEMq_GPR64"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_movepi64_pi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Copy the lower 64-bit integer in "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVDQ2Q" form="mm, xmm" xed="MOVDQ2Q_MMXq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_movpi64_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m64" varname="a" etype="UI64"/>
+	<description>Copy the 64-bit integer "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name="MOVQ2DQ" form="xmm, mm" xed="MOVQ2DQ_XMMdq_MMXq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_move_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Move</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Copy the lower 64-bit integer in "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<instruction name="MOVQ" form="xmm, xmm" xed="MOVQ_XMMdq_XMMq_0F7E"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_packs_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="SI8"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := Saturate8(a[15:0])
+dst[15:8] := Saturate8(a[31:16])
+dst[23:16] := Saturate8(a[47:32])
+dst[31:24] := Saturate8(a[63:48])
+dst[39:32] := Saturate8(a[79:64])
+dst[47:40] := Saturate8(a[95:80])
+dst[55:48] := Saturate8(a[111:96])
+dst[63:56] := Saturate8(a[127:112])
+dst[71:64] := Saturate8(b[15:0])
+dst[79:72] := Saturate8(b[31:16])
+dst[87:80] := Saturate8(b[47:32])
+dst[95:88] := Saturate8(b[63:48])
+dst[103:96] := Saturate8(b[79:64])
+dst[111:104] := Saturate8(b[95:80])
+dst[119:112] := Saturate8(b[111:96])
+dst[127:120] := Saturate8(b[127:112])
+	</operation>
+	<instruction name="PACKSSWB" form="xmm, xmm" xed="PACKSSWB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_packs_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:0])
+dst[31:16] := Saturate16(a[63:32])
+dst[47:32] := Saturate16(a[95:64])
+dst[63:48] := Saturate16(a[127:96])
+dst[79:64] := Saturate16(b[31:0])
+dst[95:80] := Saturate16(b[63:32])
+dst[111:96] := Saturate16(b[95:64])
+dst[127:112] := Saturate16(b[127:96])
+	</operation>
+	<instruction name="PACKSSDW" form="xmm, xmm" xed="PACKSSDW_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_packus_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Convert packed signed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[7:0] := SaturateU8(a[15:0])
+dst[15:8] := SaturateU8(a[31:16])
+dst[23:16] := SaturateU8(a[47:32])
+dst[31:24] := SaturateU8(a[63:48])
+dst[39:32] := SaturateU8(a[79:64])
+dst[47:40] := SaturateU8(a[95:80])
+dst[55:48] := SaturateU8(a[111:96])
+dst[63:56] := SaturateU8(a[127:112])
+dst[71:64] := SaturateU8(b[15:0])
+dst[79:72] := SaturateU8(b[31:16])
+dst[87:80] := SaturateU8(b[47:32])
+dst[95:88] := SaturateU8(b[63:48])
+dst[103:96] := SaturateU8(b[79:64])
+dst[111:104] := SaturateU8(b[95:80])
+dst[119:112] := SaturateU8(b[111:96])
+dst[127:120] := SaturateU8(b[127:112])
+	</operation>
+	<instruction name="PACKUSWB" form="xmm, xmm" xed="PACKUSWB_XMMdq_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_extract_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[15:0] := (a[127:0] &gt;&gt; (imm8[2:0] * 16))[15:0]
+dst[31:16] := 0
+	</operation>
+	<instruction name="PEXTRW" form="r32, xmm, imm8" xed="PEXTRW_GPR32_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_insert_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="i" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="3"/>
+	<description>Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[2:0]*16
+dst[sel+15:sel] := i[15:0]
+	</operation>
+	<instruction name="PINSRW" form="xmm, r32, imm8" xed="PINSRW_XMMdq_GPR32_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_movemask_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="MASK"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[j] := a[i+7]
+ENDFOR
+dst[MAX:16] := 0
+	</operation>
+	<instruction name="PMOVMSKB" form="r32, xmm" xed="PMOVMSKB_GPR32_XMMdq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_shuffle_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+DEFINE SELECT4(src, control) {
+	CASE(control[1:0]) OF
+	0:	tmp[31:0] := src[31:0]
+	1:	tmp[31:0] := src[63:32]
+	2:	tmp[31:0] := src[95:64]
+	3:	tmp[31:0] := src[127:96]
+	ESAC
+	RETURN tmp[31:0]
+}
+dst[31:0] := SELECT4(a[127:0], imm8[1:0])
+dst[63:32] := SELECT4(a[127:0], imm8[3:2])
+dst[95:64] := SELECT4(a[127:0], imm8[5:4])
+dst[127:96] := SELECT4(a[127:0], imm8[7:6])
+	</operation>
+	<instruction name="PSHUFD" form="xmm, xmm, imm8" xed="PSHUFD_XMMdq_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_shufflehi_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[79:64] := (a &gt;&gt; (imm8[1:0] * 16))[79:64]
+dst[95:80] := (a &gt;&gt; (imm8[3:2] * 16))[79:64]
+dst[111:96] := (a &gt;&gt; (imm8[5:4] * 16))[79:64]
+dst[127:112] := (a &gt;&gt; (imm8[7:6] * 16))[79:64]
+	</operation>
+	<instruction name="PSHUFHW" form="xmm, xmm, imm8" xed="PSHUFHW_XMMdq_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_shufflelo_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst".</description>
+	<operation>
+dst[15:0] := (a &gt;&gt; (imm8[1:0] * 16))[15:0]
+dst[31:16] := (a &gt;&gt; (imm8[3:2] * 16))[15:0]
+dst[47:32] := (a &gt;&gt; (imm8[5:4] * 16))[15:0]
+dst[63:48] := (a &gt;&gt; (imm8[7:6] * 16))[15:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="PSHUFLW" form="xmm, xmm, imm8" xed="PSHUFLW_XMMdq_XMMdq_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpackhi_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[71:64] 
+	dst[15:8] := src2[71:64] 
+	dst[23:16] := src1[79:72] 
+	dst[31:24] := src2[79:72] 
+	dst[39:32] := src1[87:80] 
+	dst[47:40] := src2[87:80] 
+	dst[55:48] := src1[95:88] 
+	dst[63:56] := src2[95:88] 
+	dst[71:64] := src1[103:96] 
+	dst[79:72] := src2[103:96] 
+	dst[87:80] := src1[111:104] 
+	dst[95:88] := src2[111:104] 
+	dst[103:96] := src1[119:112] 
+	dst[111:104] := src2[119:112] 
+	dst[119:112] := src1[127:120] 
+	dst[127:120] := src2[127:120] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKHBW" form="xmm, xmm" xed="PUNPCKHBW_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpackhi_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[79:64]
+	dst[31:16] := src2[79:64] 
+	dst[47:32] := src1[95:80] 
+	dst[63:48] := src2[95:80] 
+	dst[79:64] := src1[111:96] 
+	dst[95:80] := src2[111:96] 
+	dst[111:96] := src1[127:112] 
+	dst[127:112] := src2[127:112] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKHWD" form="xmm, xmm" xed="PUNPCKHWD_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpackhi_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[95:64] 
+	dst[63:32] := src2[95:64] 
+	dst[95:64] := src1[127:96] 
+	dst[127:96] := src2[127:96] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKHDQ" form="xmm, xmm" xed="PUNPCKHDQ_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpackhi_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKHQDQ" form="xmm, xmm" xed="PUNPCKHQDQ_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpacklo_epi8">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
+	dst[7:0] := src1[7:0] 
+	dst[15:8] := src2[7:0] 
+	dst[23:16] := src1[15:8] 
+	dst[31:24] := src2[15:8] 
+	dst[39:32] := src1[23:16] 
+	dst[47:40] := src2[23:16] 
+	dst[55:48] := src1[31:24] 
+	dst[63:56] := src2[31:24] 
+	dst[71:64] := src1[39:32]
+	dst[79:72] := src2[39:32] 
+	dst[87:80] := src1[47:40] 
+	dst[95:88] := src2[47:40] 
+	dst[103:96] := src1[55:48] 
+	dst[111:104] := src2[55:48] 
+	dst[119:112] := src1[63:56] 
+	dst[127:120] := src2[63:56] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKLBW" form="xmm, xmm" xed="PUNPCKLBW_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpacklo_epi16">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
+	dst[15:0] := src1[15:0] 
+	dst[31:16] := src2[15:0] 
+	dst[47:32] := src1[31:16] 
+	dst[63:48] := src2[31:16] 
+	dst[79:64] := src1[47:32] 
+	dst[95:80] := src2[47:32] 
+	dst[111:96] := src1[63:48] 
+	dst[127:112] := src2[63:48] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKLWD" form="xmm, xmm" xed="PUNPCKLWD_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpacklo_epi32">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
+	dst[31:0] := src1[31:0] 
+	dst[63:32] := src2[31:0] 
+	dst[95:64] := src1[63:32] 
+	dst[127:96] := src2[63:32] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKLDQ" form="xmm, xmm" xed="PUNPCKLDQ_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpacklo_epi64">
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="PUNPCKLQDQ" form="xmm, xmm" xed="PUNPCKLQDQ_XMMdq_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_add_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] + b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="ADDSD" form="xmm, xmm" xed="ADDSD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_add_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] + b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="ADDPD" form="xmm, xmm" xed="ADDPD_XMMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_div_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] / b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="DIVSD" form="xmm, xmm" xed="DIVSD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_div_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	dst[i+63:i] := a[i+63:i] / b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="DIVPD" form="xmm, xmm" xed="DIVPD_XMMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_max_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := MAX(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="MAXSD" form="xmm, xmm" xed="MAXSD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_max_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MAX(a[i+63:i], b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="MAXPD" form="xmm, xmm" xed="MAXPD_XMMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_min_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := MIN(a[63:0], b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="MINSD" form="xmm, xmm" xed="MINSD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_min_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := MIN(a[i+63:i], b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="MINPD" form="xmm, xmm" xed="MINPD_XMMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_mul_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] * b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="MULSD" form="xmm, xmm" xed="MULSD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_mul_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] * b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="MULPD" form="xmm, xmm" xed="MULPD_XMMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sqrt_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := SQRT(b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="SQRTSD" form="xmm, xmm" xed="SQRTSD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sqrt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Elementary Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SQRT(a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="SQRTPD" form="xmm, xmm" xed="SQRTPD_XMMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sub_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="SUBSD" form="xmm, xmm" xed="SUBSD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_sub_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] - b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="SUBPD" form="xmm, xmm" xed="SUBPD_XMMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_and_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] AND b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="ANDPD" form="xmm, xmm" xed="ANDPD_XMMxuq_XMMxuq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_andnot_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="ANDNPD" form="xmm, xmm" xed="ANDNPD_XMMxuq_XMMxuq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_or_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] OR b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="ORPD" form="xmm, xmm" xed="ORPD_XMMxuq_XMMxuq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_xor_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Logical</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[i+63:i] XOR b[i+63:i]
+ENDFOR
+	</operation>
+	<instruction name="XORPD" form="xmm, xmm" xed="XORPD_XMMxuq_XMMxuq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpeq_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] == b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmplt_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &lt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmple_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &lt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpgt_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &gt; b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpge_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] &gt;= b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpord_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>dst[63:0] := (a[63:0] != NaN AND b[63:0] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpunord_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>dst[63:0] := (a[63:0] == NaN OR b[63:0] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpneq_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (a[63:0] != b[63:0]) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpnlt_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (!(a[63:0] &lt; b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpnle_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (!(a[63:0] &lt;= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpngt_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (!(a[63:0] &gt; b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpnge_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := (!(a[63:0] &gt;= b[63:0])) ? 0xFFFFFFFFFFFFFFFF : 0
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="CMPSD" form="xmm, xmm, imm8" xed="CMPSD_XMM_XMMsd_XMMsd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpeq_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] == b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmplt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &lt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmple_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &lt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpgt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &gt; b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpge_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] &gt;= b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpord_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] != NaN AND b[i+63:i] != NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpunord_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] == NaN OR b[i+63:i] == NaN) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpneq_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (a[i+63:i] != b[i+63:i]) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpnlt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (!(a[i+63:i] &lt; b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpnle_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (!(a[i+63:i] &lt;= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpngt_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (!(a[i+63:i] &gt; b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cmpnge_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := (!(a[i+63:i] &gt;= b[i+63:i])) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="CMPPD" form="xmm, xmm, imm8" xed="CMPPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_comieq_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] == b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISD" form="xmm, xmm" xed="COMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_comilt_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &lt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISD" form="xmm, xmm" xed="COMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_comile_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &lt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISD" form="xmm, xmm" xed="COMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_comigt_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &gt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISD" form="xmm, xmm" xed="COMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_comige_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] &gt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISD" form="xmm, xmm" xed="COMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_comineq_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1).</description>
+	<operation>
+RETURN ( a[63:0] != b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="COMISD" form="xmm, xmm" xed="COMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_ucomieq_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] == b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISD" form="xmm, xmm" xed="UCOMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_ucomilt_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &lt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISD" form="xmm, xmm" xed="UCOMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_ucomile_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &lt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISD" form="xmm, xmm" xed="UCOMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_ucomigt_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &gt; b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISD" form="xmm, xmm" xed="UCOMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_ucomige_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] &gt;= b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISD" form="xmm, xmm" xed="UCOMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_ucomineq_sd">
+	<type>Floating Point</type>
+	<type>Flag</type>
+	<CPUID>SSE2</CPUID>
+	<category>Compare</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.</description>
+	<operation>
+RETURN ( a[63:0] != b[63:0] ) ? 1 : 0
+	</operation>
+	<instruction name="UCOMISD" form="xmm, xmm" xed="UCOMISD_XMMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtpd_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_FP32(a[k+63:k])
+ENDFOR
+dst[127:64] := 0
+	</operation>
+	<instruction name="CVTPD2PS" form="xmm, xmm" xed="CVTPD2PS_XMMps_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtps_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k])
+ENDFOR
+	</operation>
+	<instruction name="CVTPS2PD" form="xmm, xmm" xed="CVTPS2PD_XMMpd_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name="CVTPD2DQ" form="xmm, xmm" xed="CVTPD2DQ_XMMdq_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsd_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32(a[63:0])
+	</operation>
+	<instruction name="CVTSD2SI" form="r32, xmm" xed="CVTSD2SI_GPR32d_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsd_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name="CVTSD2SI" form="r64, xmm" xed="CVTSD2SI_GPR64q_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsd_si64x">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64(a[63:0])
+	</operation>
+	<instruction name="CVTSD2SI" form="r64, xmm" xed="CVTSD2SI_GPR64q_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsd_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_FP32(b[63:0])
+dst[127:32] := a[127:32]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="CVTSD2SS" form="xmm, xmm" xed="CVTSD2SS_XMMss_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtsd_f64">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="double" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Copy the lower double-precision (64-bit) floating-point element of "a" to "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+	</operation>
+	<instruction name="MOVSD" form="m64, xmm" xed="MOVSD_XMM_MEMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtss_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP32_To_FP64(b[31:0])
+dst[127:64] := a[127:64]
+dst[MAX:128] := 0
+	</operation>
+	<instruction name="CVTSS2SD" form="xmm, xmm" xed="CVTSS2SD_XMMsd_XMMss"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvttpd_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name="CVTTPD2DQ" form="xmm, xmm" xed="CVTTPD2DQ_XMMdq_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvttsd_si32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[31:0] := Convert_FP64_To_Int32_Truncate(a[63:0])
+	</operation>
+	<instruction name="CVTTSD2SI" form="r32, xmm" xed="CVTTSD2SI_GPR32d_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvttsd_si64">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name="CVTTSD2SI" form="r64, xmm" xed="CVTTSD2SI_GPR64q_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvttsd_si64x">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst".</description>
+	<operation>
+dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0])
+	</operation>
+	<instruction name="CVTTSD2SI" form="r64, xmm" xed="CVTTSD2SI_GPR64q_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvtps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTPS2DQ" form="xmm, xmm" xed="CVTPS2DQ_XMMdq_XMMps"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_cvttps_epi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="CVTTPS2DQ" form="xmm, xmm" xed="CVTTPS2DQ_XMMdq_XMMps"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_cvtpd_pi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name="CVTPD2PI" form="mm, xmm" xed="CVTPD2PI_MMXq_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_cvttpd_pi32">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Convert</category>
+	<return type="__m64" varname="dst" etype="FP32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 32*j
+	k := 64*j
+	dst[i+31:i] := Convert_FP64_To_Int32_Truncate(a[k+63:k])
+ENDFOR
+	</operation>
+	<instruction name="CVTTPD2PI" form="mm, xmm" xed="CVTTPD2PI_MMXq_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Copy double-precision (64-bit) floating-point element "a" to the lower element of "dst", and zero the upper element.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := 0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set1_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_pd1">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="a" etype="FP64"/>
+	<description>Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := a[63:0]
+ENDFOR
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_set_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="e1" etype="FP64"/>
+	<parameter type="double" varname="e0" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values.</description>
+	<operation>
+dst[63:0] := e0
+dst[127:64] := e1
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_setr_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double" varname="e1" etype="FP64"/>
+	<parameter type="double" varname="e0" etype="FP64"/>
+	<description>Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.</description>
+	<operation>
+dst[63:0] := e1
+dst[127:64] := e0
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_setzero_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Set</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="void"/>
+	<description>Return vector of type __m128d with all elements set to zero.</description>
+	<operation>
+dst[MAX:0] := 0
+	</operation>
+	<instruction name="XORPD" form="xmm, xmm" xed="XORPD_XMMxuq_XMMxuq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_load_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="MOVAPD" form="xmm, m128" xed="MOVAPD_XMMpd_MEMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_load1_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name="MOVAPD" form="xmm, m128" xed="MOVAPD_XMMpd_MEMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_load_pd1">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name="MOVAPD" form="xmm, m128" xed="MOVAPD_XMMpd_MEMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_loadr_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load 2 double-precision (64-bit) floating-point elements from memory into "dst" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+127:mem_addr+64]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name="MOVAPD" form="xmm, m128" xed="MOVAPD_XMMpd_MEMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_loadu_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<description>Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into "dst".
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="MOVUPD" form="xmm, m128" xed="MOVUPD_XMMpd_MEMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_load_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower of "dst", and zero the upper element. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := 0
+	</operation>
+	<instruction name="MOVSD" form="xmm, m64" xed="MOVSD_XMM_XMMdq_MEMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_loadh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the upper element of "dst", and copy the lower element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name="MOVHPD" form="xmm, m64" xed="MOVHPD_XMMsd_MEMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_loadl_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="MOVLPD" form="xmm, m64" xed="MOVLPD_XMMsd_MEMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_stream_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVNTPD" form="m128, xmm" xed="MOVNTPD_MEMdq_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_store_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory. "mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name="MOVSD" form="m64, xmm" xed="MOVSD_XMM_MEMsd_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_store1_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+MEM[mem_addr+127:mem_addr+64] := a[63:0]
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_store_pd1">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into 2 contiguous elements in memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+MEM[mem_addr+127:mem_addr+64] := a[63:0]
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_store_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVAPD" form="m128, xmm" xed="MOVAPD_MEMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_storeu_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory.
+	"mem_addr" does not need to be aligned on any particular boundary.</description>
+	<operation>
+MEM[mem_addr+127:mem_addr] := a[127:0]
+	</operation>
+	<instruction name="MOVUPD" form="m128, xmm" xed="MOVUPD_MEMpd_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" sequence="TRUE" name="_mm_storer_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="128"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store 2 double-precision (64-bit) floating-point elements from "a" into memory in reverse order.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[127:64]
+MEM[mem_addr+127:mem_addr+64] := a[63:0]
+	</operation>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_storeh_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store the upper double-precision (64-bit) floating-point element from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[127:64]
+	</operation>
+	<instruction name="MOVHPD" form="m64, xmm" xed="MOVHPD_MEMq_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_storel_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Store</category>
+	<return type="void"/>
+	<parameter type="double*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Store the lower double-precision (64-bit) floating-point element from "a" into memory.</description>
+	<operation>
+MEM[mem_addr+63:mem_addr] := a[63:0]
+	</operation>
+	<instruction name="MOVLPD" form="m64, xmm" xed="MOVLPD_MEMq_XMMsd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpackhi_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[127:64] 
+	dst[127:64] := src2[127:64] 
+	RETURN dst[127:0]	
+}
+dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="UNPCKHPD" form="xmm, xmm" xed="UNPCKHPD_XMMpd_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_unpacklo_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst".</description>
+	<operation>
+DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
+	dst[63:0] := src1[63:0] 
+	dst[127:64] := src2[63:0] 
+	RETURN dst[127:0]
+}
+dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
+	</operation>
+	<instruction name="UNPCKLPD" form="xmm, xmm" xed="UNPCKLPD_XMMpd_XMMq"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_movemask_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Miscellaneous</category>
+	<return type="int" varname="dst" etype="MASK"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF a[i+63]
+		dst[j] := 1
+	ELSE
+		dst[j] := 0
+	FI
+ENDFOR
+dst[MAX:2] := 0
+	</operation>
+	<instruction name="MOVMSKPD" form="r32, xmm" xed="MOVMSKPD_GPR32_XMMpd"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_shuffle_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst".</description>
+	<operation>
+dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
+dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
+	</operation>
+	<instruction name="SHUFPD" form="xmm, xmm, imm8" xed="SHUFPD_XMMpd_XMMpd_IMMb"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" vexEq="TRUE" name="_mm_move_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := b[63:0]
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="MOVSD" form="xmm, xmm" xed="MOVSD_XMM_XMMsd_XMMsd_0F10"/>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_castpd_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Cast</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m128d to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_castpd_si128">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Cast</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Cast vector of type __m128d to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_castps_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Cast</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m128 to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_castps_si128">
+	<type>Floating Point</type>
+	<type>Integer</type>
+	<CPUID>SSE2</CPUID>
+	<category>Cast</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Cast vector of type __m128 to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_castsi128_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Cast</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<description>Cast vector of type __m128i to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE2" name="_mm_castsi128_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE2</CPUID>
+	<category>Cast</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Cast vector of type __m128i to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.</description>
+	<header>emmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_addsub_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Alternatively add and subtract packed single-precision (32-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF ((j &amp; 1) == 0)
+		dst[i+31:i] := a[i+31:i] - b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i] + b[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="ADDSUBPS" form="xmm, xmm" xed="ADDSUBPS_XMMps_XMMps"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_addsub_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Alternatively add and subtract packed double-precision (64-bit) floating-point elements in "a" to/from packed elements in "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF ((j &amp; 1) == 0)
+		dst[i+63:i] := a[i+63:i] - b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i] + b[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="ADDSUBPD" form="xmm, xmm" xed="ADDSUBPD_XMMpd_XMMpd"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_hadd_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[127:64] + a[63:0]
+dst[127:64] := b[127:64] + b[63:0]
+	</operation>
+	<instruction name="HADDPD" form="xmm, xmm" xed="HADDPD_XMMpd_XMMpd"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_hadd_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+	</operation>
+	<instruction name="HADDPS" form="xmm, xmm" xed="HADDPS_XMMps_XMMps"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_hsub_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0] - a[127:64]
+dst[127:64] := b[63:0] - b[127:64]
+	</operation>
+	<instruction name="HSUBPD" form="xmm, xmm" xed="HSUBPD_XMMpd_XMMpd"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_hsub_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+	</operation>
+	<instruction name="HSUBPS" form="xmm, xmm" xed="HSUBPS_XMMps_XMMps"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_lddqu_si128">
+	<type>Integer</type>
+	<CPUID>SSE3</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i const*" varname="mem_addr" etype="M128" memwidth="128"/>
+	<description>Load 128-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm_loadu_si128" when the data crosses a cache line boundary.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="LDDQU" form="xmm, m128" xed="LDDQU_XMMpd_MEMdq"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_movedup_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Move</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Duplicate the low double-precision (64-bit) floating-point element from "a", and store the results in "dst".</description>
+	<operation>
+dst[63:0] := a[63:0]
+dst[127:64] := a[63:0]
+	</operation>
+	<instruction name="MOVDDUP" form="xmm, xmm" xed="MOVDDUP_XMMdq_XMMq"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_loaddup_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Load</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="double const*" varname="mem_addr" etype="FP64" memwidth="64"/>
+	<description>Load a double-precision (64-bit) floating-point element from memory into both elements of "dst".</description>
+	<operation>
+dst[63:0] := MEM[mem_addr+63:mem_addr]
+dst[127:64] := MEM[mem_addr+63:mem_addr]
+	</operation>
+	<instruction name="MOVDDUP" form="xmm, m64" xed="MOVDDUP_XMMdq_MEMq"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_movehdup_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Duplicate odd-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] 
+dst[63:32] := a[63:32]
+dst[95:64] := a[127:96] 
+dst[127:96] := a[127:96]
+	</operation>
+	<instruction name="MOVSHDUP" form="xmm, xmm" xed="MOVSHDUP_XMMps_XMMps"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE3" vexEq="TRUE" name="_mm_moveldup_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE3</CPUID>
+	<category>Move</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Duplicate even-indexed single-precision (32-bit) floating-point elements from "a", and store the results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] 
+dst[63:32] := a[31:0]
+dst[95:64] := a[95:64] 
+dst[127:96] := a[95:64]
+	</operation>
+	<instruction name="MOVSLDUP" form="xmm, xmm" xed="MOVSLDUP_XMMps_XMMps"/>
+	<header>pmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_blend_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF imm8[j]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="BLENDPD" form="xmm, xmm, imm8" xed="BLENDPD_XMMdq_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_blend_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[j]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="BLENDPS" form="xmm, xmm, imm8" xed="BLENDPS_XMMdq_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_blendv_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="__m128d" varname="mask" etype="FP64"/>
+	<description>Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	IF mask[i+63]
+		dst[i+63:i] := b[i+63:i]
+	ELSE
+		dst[i+63:i] := a[i+63:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="BLENDVPD" form="xmm, xmm" xed="BLENDVPD_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_blendv_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="__m128" varname="mask" etype="FP32"/>
+	<description>Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF mask[i+31]
+		dst[i+31:i] := b[i+31:i]
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="BLENDVPS" form="xmm, xmm" xed="BLENDVPS_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_blendv_epi8">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="__m128i" varname="mask" etype="UI8"/>
+	<description>Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF mask[i+7]
+		dst[i+7:i] := b[i+7:i]
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PBLENDVB" form="xmm, xmm" xed="PBLENDVB_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_blend_epi16">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF imm8[j]
+		dst[i+15:i] := b[i+15:i]
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PBLENDW" form="xmm, xmm, imm8" xed="PBLENDW_XMMdq_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_dp_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Conditionally multiply the packed double-precision (64-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
+	<operation>
+DEFINE DP(a[127:0], b[127:0], imm8[7:0]) {
+	FOR j := 0 to 1
+		i := j*64
+		IF imm8[(4+j)%8]
+			temp[i+63:i] := a[i+63:i] * b[i+63:i]
+		ELSE
+			temp[i+63:i] := 0.0
+		FI
+	ENDFOR
+	
+	sum[63:0] := temp[127:64] + temp[63:0]
+	
+	FOR j := 0 to 1
+		i := j*64
+		IF imm8[j%8]
+			tmpdst[i+63:i] := sum[63:0]
+		ELSE
+			tmpdst[i+63:i] := 0.0
+		FI
+	ENDFOR
+	RETURN tmpdst[127:0]
+}
+dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
+	</operation>
+	<instruction name="DPPD" form="xmm, xmm, imm8" xed="DPPD_XMMdq_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_dp_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".</description>
+	<operation>
+DEFINE DP(a[127:0], b[127:0], imm8[7:0]) {
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[(4+j)%8]
+			temp[i+31:i] := a[i+31:i] * b[i+31:i]
+		ELSE
+			temp[i+31:i] := 0
+		FI
+	ENDFOR
+	
+	sum[31:0] := (temp[127:96] + temp[95:64]) + (temp[63:32] + temp[31:0])
+	
+	FOR j := 0 to 3
+		i := j*32
+		IF imm8[j%8]
+			tmpdst[i+31:i] := sum[31:0]
+		ELSE
+			tmpdst[i+31:i] := 0
+		FI
+	ENDFOR
+	RETURN tmpdst[127:0]
+}
+dst[127:0] := DP(a[127:0], b[127:0], imm8[7:0])
+	</operation>
+	<instruction name="DPPS" form="xmm, xmm, imm8" xed="DPPS_XMMdq_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_extract_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+dst[31:0] := (a[127:0] &gt;&gt; (imm8[1:0] * 32))[31:0]
+	</operation>
+	<instruction name="EXTRACTPS" form="r32, xmm, imm8" xed="EXTRACTPS_GPR32d_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_extract_epi8">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".</description>
+	<operation>
+dst[7:0] := (a[127:0] &gt;&gt; (imm8[3:0] * 8))[7:0]
+dst[31:8] := 0
+	</operation>
+	<instruction name="PEXTRB" form="r32, xmm, imm8" xed="PEXTRB_GPR32d_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_extract_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+dst[31:0] := (a[127:0] &gt;&gt; (imm8[1:0] * 32))[31:0]
+	</operation>
+	<instruction name="PEXTRD" form="r32, xmm, imm8" xed="PEXTRD_GPR32d_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_extract_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst".</description>
+	<operation>
+dst[63:0] := (a[127:0] &gt;&gt; (imm8[0] * 64))[63:0]
+	</operation>
+	<instruction name="PEXTRQ" form="r64, xmm, imm8" xed="PEXTRQ_GPR64q_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_insert_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set).</description>
+	<operation>
+tmp2[127:0] := a[127:0]
+CASE (imm8[7:6]) OF
+0: tmp1[31:0] := b[31:0]
+1: tmp1[31:0] := b[63:32]
+2: tmp1[31:0] := b[95:64]
+3: tmp1[31:0] := b[127:96]
+ESAC
+CASE (imm8[5:4]) OF
+0: tmp2[31:0] := tmp1[31:0]
+1: tmp2[63:32] := tmp1[31:0]
+2: tmp2[95:64] := tmp1[31:0]
+3: tmp2[127:96] := tmp1[31:0]
+ESAC
+FOR j := 0 to 3
+	i := j*32
+	IF imm8[j%8]
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := tmp2[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="INSERTPS" form="xmm, xmm, imm8" xed="INSERTPS_XMMps_XMMps_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_insert_epi8">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="int" varname="i" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[3:0]*8
+dst[sel+7:sel] := i[7:0]
+	</operation>
+	<instruction name="PINSRB" form="xmm, r32, imm8" xed="PINSRB_XMMdq_GPR32d_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_insert_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="int" varname="i" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="2"/>
+	<description>Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[1:0]*32
+dst[sel+31:sel] := i[31:0]
+	</operation>
+	<instruction name="PINSRD" form="xmm, r32, imm8" xed="PINSRD_XMMdq_GPR32d_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_insert_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__int64" varname="i" etype="UI64"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="1"/>
+	<description>Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8".</description>
+	<operation>
+dst[127:0] := a[127:0]
+sel := imm8[0]*64
+dst[sel+63:sel] := i[63:0]
+	</operation>
+	<instruction name="PINSRQ" form="xmm, r64, imm8" xed="PINSRQ_XMMdq_GPR64q_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_max_epi8">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := MAX(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXSB" form="xmm, xmm" xed="PMAXSB_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_max_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXSD" form="xmm, xmm" xed="PMAXSD_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_max_epu32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MAX(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXUD" form="xmm, xmm" xed="PMAXUD_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_max_epu16">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := MAX(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMAXUW" form="xmm, xmm" xed="PMAXUW_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_min_epi8">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Compare packed signed 8-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := MIN(a[i+7:i], b[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINSB" form="xmm, xmm" xed="PMINSB_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_min_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Compare packed signed 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINSD" form="xmm, xmm" xed="PMINSD_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_min_epu32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := MIN(a[i+31:i], b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINUD" form="xmm, xmm" xed="PMINUD_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_min_epu16">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<parameter type="__m128i" varname="b" etype="UI16"/>
+	<description>Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := MIN(a[i+15:i], b[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PMINUW" form="xmm, xmm" xed="PMINUW_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_packus_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Convert packed signed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst".</description>
+	<operation>
+dst[15:0] := SaturateU16(a[31:0])
+dst[31:16] := SaturateU16(a[63:32])
+dst[47:32] := SaturateU16(a[95:64])
+dst[63:48] := SaturateU16(a[127:96])
+dst[79:64] := SaturateU16(b[31:0])
+dst[95:80] := SaturateU16(b[63:32])
+dst[111:96] := SaturateU16(b[95:64])
+dst[127:112] := SaturateU16(b[127:96])
+	</operation>
+	<instruction name="PACKUSDW" form="xmm, xmm" xed="PACKUSDW_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cmpeq_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI64"/>
+	<parameter type="__m128i" varname="b" etype="UI64"/>
+	<description>Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] == b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPEQQ" form="xmm, xmm" xed="PCMPEQQ_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepi8_epi16">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	dst[l+15:l] := SignExtend16(a[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMOVSXBW" form="xmm, xmm" xed="PMOVSXBW_XMMdq_XMMq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepi8_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := SignExtend32(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVSXBD" form="xmm, xmm" xed="PMOVSXBD_XMMdq_XMMd"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepi8_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := SignExtend64(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVSXBQ" form="xmm, xmm" xed="PMOVSXBQ_XMMdq_XMMw"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepi16_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := SignExtend32(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVSXWD" form="xmm, xmm" xed="PMOVSXWD_XMMdq_XMMq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepi16_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := SignExtend64(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVSXWQ" form="xmm, xmm" xed="PMOVSXWQ_XMMdq_XMMd"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepi32_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := SignExtend64(a[k+31:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVSXDQ" form="xmm, xmm" xed="PMOVSXDQ_XMMdq_XMMq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepu8_epi16">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	l := j*16
+	dst[l+15:l] := ZeroExtend16(a[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PMOVZXBW" form="xmm, xmm" xed="PMOVZXBW_XMMdq_XMMq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepu8_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 8*j
+	dst[i+31:i] := ZeroExtend32(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVZXBD" form="xmm, xmm" xed="PMOVZXBD_XMMdq_XMMd"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepu8_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<description>Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 8*j
+	dst[i+63:i] := ZeroExtend64(a[k+7:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVZXBQ" form="xmm, xmm" xed="PMOVZXBQ_XMMdq_XMMw"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepu16_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := 32*j
+	k := 16*j
+	dst[i+31:i] := ZeroExtend32(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVZXWD" form="xmm, xmm" xed="PMOVZXWD_XMMdq_XMMq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepu16_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 16*j
+	dst[i+63:i] := ZeroExtend64(a[k+15:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVZXWQ" form="xmm, xmm" xed="PMOVZXWQ_XMMdq_XMMd"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_cvtepu32_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Convert</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<description>Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := 64*j
+	k := 32*j
+	dst[i+63:i] := ZeroExtend64(a[k+31:k])
+ENDFOR
+	</operation>
+	<instruction name="PMOVZXDQ" form="xmm, xmm" xed="PMOVZXDQ_XMMdq_XMMq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_mul_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI64"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := SignExtend64(a[i+31:i]) * SignExtend64(b[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="PMULDQ" form="xmm, xmm" xed="PMULDQ_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_mullo_epi32">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="UI32"/>
+	<description>Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	tmp[63:0] := a[i+31:i] * b[i+31:i]
+	dst[i+31:i] := tmp[31:0]
+ENDFOR
+	</operation>
+	<instruction name="PMULLD" form="xmm, xmm" xed="PMULLD_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_testz_si128">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value.</description>
+	<operation>
+IF ((a[127:0] AND b[127:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF (((NOT a[127:0]) AND b[127:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN ZF
+	</operation>
+	<instruction name="PTEST" form="xmm, xmm" xed="PTEST_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_testc_si128">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="k" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value.</description>
+	<operation>
+IF ((a[127:0] AND b[127:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF (((NOT a[127:0]) AND b[127:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+RETURN CF
+	</operation>
+	<instruction name="PTEST" form="xmm, xmm" xed="PTEST_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_testnzc_si128">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+IF ((a[127:0] AND b[127:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF (((NOT a[127:0]) AND b[127:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="PTEST" form="xmm, xmm" xed="PTEST_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_test_all_zeros">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="mask" etype="M128"/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and return 1 if the result is zero, otherwise return 0.</description>
+	<operation>
+IF ((a[127:0] AND mask[127:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+dst := ZF
+	</operation>
+	<instruction name="PTEST" form="xmm, xmm" xed="PTEST_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_test_mix_ones_zeros">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="mask" etype="M128"/>
+	<description>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "mask", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</description>
+	<operation>
+IF ((a[127:0] AND mask[127:0]) == 0)
+	ZF := 1
+ELSE
+	ZF := 0
+FI
+IF (((NOT a[127:0]) AND mask[127:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+IF (ZF == 0 &amp;&amp; CF == 0)
+	dst := 1
+ELSE
+	dst := 0
+FI
+	</operation>
+	<instruction name="PTEST" form="xmm, xmm" xed="PTEST_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" sequence="TRUE" name="_mm_test_all_ones">
+	<type>Integer</type>
+	<type>Flag</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Logical</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<description>Compute the bitwise NOT of "a" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0.</description>
+	<operation>
+FOR j := 0 to 127
+	tmp[j] := 1
+ENDFOR
+IF (((NOT a[127:0]) AND tmp[127:0]) == 0)
+	CF := 1
+ELSE
+	CF := 0
+FI
+dst := CF
+	</operation>
+	<instruction name="PCMPEQD" form="xmm, xmm" xed="PCMPEQD_XMMdq_XMMdq"/>
+	<instruction name="PTEST" form="xmm, xmm" xed="PTEST_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_round_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" immtype="_MM_FROUND"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ROUND(a[i+63:i], rounding)
+ENDFOR
+	</operation>
+	<instruction name="ROUNDPD" form="xmm, xmm, imm8" xed="ROUNDPD_XMMpd_XMMpd_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_floor_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := FLOOR(a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="ROUNDPD" form="xmm, xmm, imm8" xed="ROUNDPD_XMMpd_XMMpd_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_ceil_pd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<description>Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := CEIL(a[i+63:i])
+ENDFOR
+	</operation>
+	<instruction name="ROUNDPD" form="xmm, xmm, imm8" xed="ROUNDPD_XMMpd_XMMpd_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_round_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" immtype="_MM_FROUND"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst".
+	[round_note]</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ROUND(a[i+31:i], rounding)
+ENDFOR
+	</operation>
+	<instruction name="ROUNDPS" form="xmm, xmm, imm8" xed="ROUNDPS_XMMps_XMMps_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_floor_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := FLOOR(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="ROUNDPS" form="xmm, xmm, imm8" xed="ROUNDPS_XMMps_XMMps_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_ceil_ps">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<description>Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := CEIL(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="ROUNDPS" form="xmm, xmm, imm8" xed="ROUNDPS_XMMps_XMMps_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_round_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<parameter type="int" varname="rounding" etype="IMM" immtype="_MM_FROUND"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+	[round_note]</description>
+	<operation>
+dst[63:0] := ROUND(b[63:0], rounding)
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="ROUNDSD" form="xmm, xmm, imm8" xed="ROUNDSD_XMMq_XMMq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_floor_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" down to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := FLOOR(b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="ROUNDSD" form="xmm, xmm, imm8" xed="ROUNDSD_XMMq_XMMq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_ceil_sd">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128d" varname="dst" etype="FP64"/>
+	<parameter type="__m128d" varname="a" etype="FP64"/>
+	<parameter type="__m128d" varname="b" etype="FP64"/>
+	<description>Round the lower double-precision (64-bit) floating-point element in "b" up to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".</description>
+	<operation>
+dst[63:0] := CEIL(b[63:0])
+dst[127:64] := a[127:64]
+	</operation>
+	<instruction name="ROUNDSD" form="xmm, xmm, imm8" xed="ROUNDSD_XMMq_XMMq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_round_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<parameter type="int" varname="rounding" etype="IMM" immtype="_MM_FROUND"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" using the "rounding" parameter, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
+	[round_note]</description>
+	<operation>
+dst[31:0] := ROUND(b[31:0], rounding)
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="ROUNDSS" form="xmm, xmm, imm8" xed="ROUNDSS_XMMd_XMMd_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_floor_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" down to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := FLOOR(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="ROUNDSS" form="xmm, xmm, imm8" xed="ROUNDSS_XMMd_XMMd_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_ceil_ss">
+	<type>Floating Point</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128" varname="dst" etype="FP32"/>
+	<parameter type="__m128" varname="a" etype="FP32"/>
+	<parameter type="__m128" varname="b" etype="FP32"/>
+	<description>Round the lower single-precision (32-bit) floating-point element in "b" up to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</description>
+	<operation>
+dst[31:0] := CEIL(b[31:0])
+dst[127:32] := a[127:32]
+	</operation>
+	<instruction name="ROUNDSS" form="xmm, xmm, imm8" xed="ROUNDSS_XMMd_XMMd_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_minpos_epu16">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="UI16"/>
+	<description>Horizontally compute the minimum amongst the packed unsigned 16-bit integers in "a", store the minimum and index in "dst", and zero the remaining bits in "dst".</description>
+	<operation>
+index[2:0] := 0
+min[15:0] := a[15:0]
+FOR j := 0 to 7
+	i := j*16
+	IF a[i+15:i] &lt; min[15:0]
+		index[2:0] := j
+		min[15:0] := a[i+15:i]
+	FI
+ENDFOR
+dst[15:0] := min[15:0]
+dst[18:16] := index[2:0]
+dst[127:19] := 0
+	</operation>
+	<instruction name="PHMINPOSUW" form="xmm, xmm" xed="PHMINPOSUW_XMMdq_XMMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_mpsadbw_epu8">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Arithmetic</category>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".
+	Eight SADs are performed using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8".</description>
+	<operation>
+DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
+	a_offset := imm8[2]*32
+	b_offset := imm8[1:0]*32
+	FOR j := 0 to 7
+		i := j*8
+		k := a_offset+i
+		l := b_offset
+		tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \
+		                   ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24]))
+	ENDFOR
+	RETURN tmp[127:0]
+}
+dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])
+	</operation>
+	<instruction name="MPSADBW" form="xmm, xmm, imm8" xed="MPSADBW_XMMdq_XMMdq_IMMb"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.1" vexEq="TRUE" name="_mm_stream_load_si128">
+	<type>Integer</type>
+	<CPUID>SSE4.1</CPUID>
+	<category>Load</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i *" varname="mem_addr" etype="M128" memwidth="128"/>
+	<description>Load 128-bits of integer data from memory into "dst" using a non-temporal memory hint.
+	"mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.</description>
+	<operation>
+dst[127:0] := MEM[mem_addr+127:mem_addr]
+	</operation>
+	<instruction name="MOVNTDQA" form="xmm, m128" xed="MOVNTDQA_XMMdq_MEMdq"/>
+	<header>smmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpistrm">
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated mask in "dst".
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+IF imm8[6] // byte / word mask
+	FOR i := 0 to UpperBound
+		j := i*size
+		IF IntRes2[i]
+			dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
+		ELSE
+			dst[j+size-1:j] := 0
+		FI
+	ENDFOR
+ELSE // bit mask
+	dst[UpperBound:0] := IntRes2[UpperBound:0]
+	dst[127:UpperBound+1] := 0
+FI
+	</operation>
+	<instruction name="PCMPISTRM" form="xmm, xmm, imm8" xed="PCMPISTRM_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpistri">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and store the generated index in "dst".
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+IF imm8[6] // most significant bit
+	tmp := UpperBound
+	dst := tmp
+	DO WHILE ((tmp &gt;= 0) AND a[tmp] == 0)
+		tmp := tmp - 1
+		dst := tmp
+	OD
+ELSE // least significant bit
+	tmp := 0
+	dst := tmp
+	DO WHILE ((tmp &lt;= UpperBound) AND a[tmp] == 0)
+		tmp := tmp + 1
+		dst := tmp
+	OD
+FI
+	</operation>
+	<instruction name="PCMPISTRI" form="xmm, xmm, imm8" xed="PCMPISTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpistrz">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128" hint="TRUE"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+bInvalid := 0
+FOR j := 0 to UpperBound
+	n := j*size
+	IF b[n+size-1:n] == 0
+		bInvalid := 1
+	FI
+ENDFOR
+dst := bInvalid
+	</operation>
+	<instruction name="PCMPISTRI" form="xmm, xmm, imm8" xed="PCMPISTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpistrc">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+dst := (IntRes2 != 0)
+	</operation>
+	<instruction name="PCMPISTRI" form="xmm, xmm, imm8" xed="PCMPISTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpistrs">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128" hint="TRUE"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+aInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	IF a[m+size-1:m] == 0
+		aInvalid := 1
+	FI
+ENDFOR
+dst := aInvalid
+	</operation>
+	<instruction name="PCMPISTRI" form="xmm, xmm, imm8" xed="PCMPISTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpistro">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns bit 0 of the resulting bit mask.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+dst := IntRes2[0]
+	</operation>
+	<instruction name="PCMPISTRI" form="xmm, xmm, imm8" xed="PCMPISTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpistra">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings with implicit lengths in "a" and "b" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF a[m+size-1:m] == 0
+			aInvalid := 1
+		FI
+		IF b[n+size-1:n] == 0
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+bInvalid := 0
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF b[n+size-1:n] == 0
+				bInvalid := 1
+			FI
+			IF bInvalid // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+dst := (IntRes2 == 0) AND bInvalid
+	</operation>
+	<instruction name="PCMPISTRI" form="xmm, xmm, imm8" xed="PCMPISTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpestrm">
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="__m128i" varname="dst" etype="M128"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="la" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="int" varname="lb" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated mask in "dst".
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+IF imm8[6] // byte / word mask
+	FOR i := 0 to UpperBound
+		j := i*size
+		IF IntRes2[i]
+			dst[j+size-1:j] := (imm8[0] ? 0xFF : 0xFFFF)
+		ELSE
+			dst[j+size-1:j] := 0
+		FI
+	ENDFOR
+ELSE // bit mask
+	dst[UpperBound:0] := IntRes2[UpperBound:0]
+	dst[127:UpperBound+1] := 0
+FI
+	</operation>
+	<instruction name="PCMPESTRM" form="xmm, xmm, imm8" xed="PCMPESTRM_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpestri">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="la" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="int" varname="lb" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and store the generated index in "dst".
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+IF imm8[6] // most significant bit
+	tmp := UpperBound
+	dst := tmp
+	DO WHILE ((tmp &gt;= 0) AND a[tmp] == 0)
+		tmp := tmp - 1
+		dst := tmp
+	OD
+ELSE // least significant bit
+	tmp := 0
+	dst := tmp
+	DO WHILE ((tmp &lt;= UpperBound) AND a[tmp] == 0)
+		tmp := tmp + 1
+		dst := tmp
+	OD
+FI
+	</operation>
+	<instruction name="PCMPESTRI" form="xmm, xmm, imm8" xed="PCMPESTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpestrz">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128" hint="TRUE"/>
+	<parameter type="int" varname="la" etype="UI32" hint="TRUE"/>
+	<parameter type="__m128i" varname="b" etype="M128" hint="TRUE"/>
+	<parameter type="int" varname="lb" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "b" was null, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+dst := (lb &lt;= UpperBound)
+	</operation>
+	<instruction name="PCMPESTRI" form="xmm, xmm, imm8" xed="PCMPESTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpestrc">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="la" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="int" varname="lb" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if the resulting mask was non-zero, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+dst := (IntRes2 != 0)
+	</operation>
+	<instruction name="PCMPESTRI" form="xmm, xmm, imm8" xed="PCMPESTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpestrs">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128" hint="TRUE"/>
+	<parameter type="int" varname="la" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="M128" hint="TRUE"/>
+	<parameter type="int" varname="lb" etype="UI32" hint="TRUE"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if any character in "a" was null, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+dst := (la &lt;= UpperBound)
+	</operation>
+	<instruction name="PCMPESTRI" form="xmm, xmm, imm8" xed="PCMPESTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpestro">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="la" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="int" varname="lb" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns bit 0 of the resulting bit mask.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+dst := IntRes2[0]
+	</operation>
+	<instruction name="PCMPESTRI" form="xmm, xmm, imm8" xed="PCMPESTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpestra">
+	<type>Flag</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>String Compare</category>
+	<return type="int" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="M128"/>
+	<parameter type="int" varname="la" etype="UI32"/>
+	<parameter type="__m128i" varname="b" etype="M128"/>
+	<parameter type="int" varname="lb" etype="UI32"/>
+	<parameter type="const int" varname="imm8" etype="IMM" immwidth="8"/>
+	<description>Compare packed strings in "a" and "b" with lengths "la" and "lb" using the control in "imm8", and returns 1 if "b" did not contain a null character and the resulting mask was zero, and 0 otherwise.
+	[strcmp_note]</description>
+	<operation>
+size := (imm8[0] ? 16 : 8) // 8 or 16-bit characters
+UpperBound := (128 / size) - 1
+BoolRes := 0
+// compare all characters
+aInvalid := 0
+bInvalid := 0
+FOR i := 0 to UpperBound
+	m := i*size
+	FOR j := 0 to UpperBound
+		n := j*size
+		BoolRes.word[i].bit[j] := (a[m+size-1:m] == b[n+size-1:n]) ? 1 : 0
+		
+		// invalidate characters after EOS
+		IF i == la
+			aInvalid := 1
+		FI
+		IF j == lb
+			bInvalid := 1
+		FI
+		
+		// override comparisons for invalid characters
+		CASE (imm8[3:2]) OF
+		0:  // equal any
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		1:  // ranges
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			FI
+		2:  // equal each
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		3:  // equal ordered
+			IF (!aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 0
+			ELSE IF (aInvalid &amp;&amp; !bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			ELSE IF (aInvalid &amp;&amp; bInvalid)
+				BoolRes.word[i].bit[j] := 1
+			FI
+		ESAC
+	ENDFOR
+ENDFOR
+// aggregate results
+CASE (imm8[3:2]) OF
+0:  // equal any
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR BoolRes.word[i].bit[j]
+		ENDFOR
+	ENDFOR
+1:  // ranges
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		FOR j := 0 to UpperBound
+			IntRes1[i] := IntRes1[i] OR (BoolRes.word[i].bit[j] AND BoolRes.word[i].bit[j+1])
+			j += 2
+		ENDFOR
+	ENDFOR
+2:  // equal each
+	IntRes1 := 0
+	FOR i := 0 to UpperBound
+		IntRes1[i] := BoolRes.word[i].bit[i]
+	ENDFOR
+3:  // equal ordered
+	IntRes1 := (imm8[0] ? 0xFF : 0xFFFF)
+	FOR i := 0 to UpperBound
+		k := i
+		FOR j := 0 to UpperBound-i
+			IntRes1[i] := IntRes1[i] AND BoolRes.word[k].bit[j]
+			k := k+1
+		ENDFOR
+	ENDFOR
+ESAC
+// optionally negate results
+FOR i := 0 to UpperBound
+	IF imm8[4]
+		IF imm8[5] // only negate valid
+			IF i &gt;= lb // invalid, don't negate
+				IntRes2[i] := IntRes1[i]
+			ELSE // valid, negate
+				IntRes2[i] := -1 XOR IntRes1[i]
+			FI
+		ELSE // negate all
+			IntRes2[i] := -1 XOR IntRes1[i]
+		FI
+	ELSE // don't negate
+		IntRes2[i] := IntRes1[i]
+	FI
+ENDFOR
+// output
+dst := (IntRes2 == 0) AND (lb &gt; UpperBound)
+	</operation>
+	<instruction name="PCMPESTRI" form="xmm, xmm, imm8" xed="PCMPESTRI_XMMdq_XMMdq_IMMb"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" vexEq="TRUE" name="_mm_cmpgt_epi64">
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Compare</category>
+	<return type="__m128i" varname="dst" etype="UI64"/>
+	<parameter type="__m128i" varname="a" etype="SI64"/>
+	<parameter type="__m128i" varname="b" etype="SI64"/>
+	<description>Compare packed signed 64-bit integers in "a" and "b" for greater-than, and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*64
+	dst[i+63:i] := ( a[i+63:i] &gt; b[i+63:i] ) ? 0xFFFFFFFFFFFFFFFF : 0
+ENDFOR
+	</operation>
+	<instruction name="PCMPGTQ" form="xmm, xmm" xed="PCMPGTQ_XMMdq_XMMdq"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" name="_mm_crc32_u8">
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="crc" etype="UI32"/>
+	<parameter type="unsigned char" varname="v" etype="UI8"/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 8-bit integer "v", and stores the result in "dst".</description>
+	<operation>tmp1[7:0] := v[0:7] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[39:0] := tmp1[7:0] &lt;&lt; 32 
+tmp4[39:0] := tmp2[31:0] &lt;&lt; 8
+tmp5[39:0] := tmp3[39:0] XOR tmp4[39:0]
+tmp6[31:0] := MOD2(tmp5[39:0], 0x11EDC6F41) // remainder from polynomial division modulus 2
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name="CRC32" form="r32, r8" xed="CRC32_GPRyy_GPR8b"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" name="_mm_crc32_u16">
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="crc" etype="UI32"/>
+	<parameter type="unsigned short" varname="v" etype="UI16"/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 16-bit integer "v", and stores the result in "dst".</description>
+	<operation>tmp1[15:0] := v[0:15] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[47:0] := tmp1[15:0] &lt;&lt; 32
+tmp4[47:0] := tmp2[31:0] &lt;&lt; 16
+tmp5[47:0] := tmp3[47:0] XOR tmp4[47:0]
+tmp6[31:0] := MOD2(tmp5[47:0], 0x11EDC6F41) // remainder from polynomial division modulus 2
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name="CRC32" form="r32, r16" xed="CRC32_GPRyy_GPRv"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" name="_mm_crc32_u32">
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<return type="unsigned int" varname="dst" etype="UI32"/>
+	<parameter type="unsigned int" varname="crc" etype="UI32"/>
+	<parameter type="unsigned int" varname="v" etype="UI32"/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 32-bit integer "v", and stores the result in "dst".</description>
+	<operation>tmp1[31:0] := v[0:31] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[63:0] := tmp1[31:0] &lt;&lt; 32
+tmp4[63:0] := tmp2[31:0] &lt;&lt; 32
+tmp5[63:0] := tmp3[63:0] XOR tmp4[63:0]
+tmp6[31:0] := MOD2(tmp5[63:0], 0x11EDC6F41) // remainder from polynomial division modulus 2
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name="CRC32" form="r32, r32" xed="CRC32_GPRyy_GPRv"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSE4.2" name="_mm_crc32_u64">
+	<type>Integer</type>
+	<CPUID>SSE4.2</CPUID>
+	<category>Cryptography</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="crc" etype="UI64"/>
+	<parameter type="unsigned __int64" varname="v" etype="UI64"/>
+	<description>Starting with the initial value in "crc", accumulates a CRC32 value for unsigned 64-bit integer "v", and stores the result in "dst".</description>
+	<operation>tmp1[63:0] := v[0:63] // bit reflection
+tmp2[31:0] := crc[0:31] // bit reflection
+tmp3[95:0] := tmp1[31:0] &lt;&lt; 32
+tmp4[95:0] := tmp2[63:0] &lt;&lt; 64
+tmp5[95:0] := tmp3[95:0] XOR tmp4[95:0]
+tmp6[31:0] := MOD2(tmp5[95:0], 0x11EDC6F41) // remainder from polynomial division modulus 2
+dst[31:0] := tmp6[0:31] // bit reflection
+	</operation>
+	<instruction name="CRC32" form="r64, r64" xed="CRC32_GPRyy_GPRv"/>
+	<header>nmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_abs_pi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	dst[i+7:i] := ABS(Int(a[i+7:i]))
+ENDFOR
+	</operation>
+	<instruction name="PABSB" form="mm, mm" xed="PABSB_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_abs_epi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<description>Compute the absolute value of packed signed 8-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	dst[i+7:i] := ABS(a[i+7:i])
+ENDFOR
+	</operation>
+	<instruction name="PABSB" form="xmm, xmm" xed="PABSB_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_abs_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := ABS(Int(a[i+15:i]))
+ENDFOR
+	</operation>
+	<instruction name="PABSW" form="mm, mm" xed="PABSW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_abs_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<description>Compute the absolute value of packed signed 16-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := ABS(a[i+15:i])
+ENDFOR
+	</operation>
+	<instruction name="PABSW" form="xmm, xmm" xed="PABSW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_abs_pi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m64" varname="dst" etype="UI32"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="PABSD" form="mm, mm" xed="PABSD_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_abs_epi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Special Math Functions</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<description>Compute the absolute value of packed signed 32-bit integers in "a", and store the unsigned results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	dst[i+31:i] := ABS(a[i+31:i])
+ENDFOR
+	</operation>
+	<instruction name="PABSD" form="xmm, xmm" xed="PABSD_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_shuffle_epi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Swizzle</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[3:0] := b[i+3:i]
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSHUFB" form="xmm, xmm" xed="PSHUFB_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_shuffle_pi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Swizzle</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<description>Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF b[i+7] == 1
+		dst[i+7:i] := 0
+	ELSE
+		index[2:0] := b[i+2:i]
+		dst[i+7:i] := a[index*8+7:index*8]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSHUFB" form="mm, mm" xed="PSHUFB_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_alignr_epi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="5"/>
+	<description>Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst".</description>
+	<operation>
+tmp[255:0] := ((a[127:0] &lt;&lt; 128)[255:0] OR b[127:0]) &gt;&gt; (imm8*8)
+dst[127:0] := tmp[127:0]
+	</operation>
+	<instruction name="PALIGNR" form="xmm, xmm, imm8" xed="PALIGNR_XMMdq_XMMdq_IMMb"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_alignr_pi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Miscellaneous</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="UI8"/>
+	<parameter type="int" varname="imm8" etype="IMM" immwidth="4"/>
+	<description>Concatenate 8-byte blocks in "a" and "b" into a 16-byte temporary result, shift the result right by "imm8" bytes, and store the low 16 bytes in "dst".</description>
+	<operation>
+tmp[127:0] := ((a[63:0] &lt;&lt; 64)[127:0] OR b[63:0]) &gt;&gt; (imm8*8)
+dst[63:0] := tmp[63:0]
+	</operation>
+	<instruction name="PALIGNR" form="mm, mm, imm8" xed="PALIGNR_MMXq_MMXq_IMMb"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_hadd_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[31:16] + a[15:0]
+dst[31:16] := a[63:48] + a[47:32]
+dst[47:32] := a[95:80] + a[79:64]
+dst[63:48] := a[127:112] + a[111:96]
+dst[79:64] := b[31:16] + b[15:0]
+dst[95:80] := b[63:48] + b[47:32]
+dst[111:96] := b[95:80] + b[79:64]
+dst[127:112] := b[127:112] + b[111:96]
+	</operation>
+	<instruction name="PHADDW" form="xmm, xmm" xed="PHADDW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_hadds_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:16] + a[15:0])
+dst[31:16] := Saturate16(a[63:48] + a[47:32])
+dst[47:32] := Saturate16(a[95:80] + a[79:64])
+dst[63:48] := Saturate16(a[127:112] + a[111:96])
+dst[79:64] := Saturate16(b[31:16] + b[15:0])
+dst[95:80] := Saturate16(b[63:48] + b[47:32])
+dst[111:96] := Saturate16(b[95:80] + b[79:64])
+dst[127:112] := Saturate16(b[127:112] + b[111:96])
+	</operation>
+	<instruction name="PHADDSW" form="xmm, xmm" xed="PHADDSW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_hadd_epi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := a[127:96] + a[95:64]
+dst[95:64] := b[63:32] + b[31:0]
+dst[127:96] := b[127:96] + b[95:64]
+	</operation>
+	<instruction name="PHADDD" form="xmm, xmm" xed="PHADDD_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_hadd_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[31:16] + a[15:0]
+dst[31:16] := a[63:48] + a[47:32]
+dst[47:32] := b[31:16] + b[15:0]
+dst[63:48] := b[63:48] + b[47:32]
+	</operation>
+	<instruction name="PHADDW" form="mm, mm" xed="PHADDW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_hadd_pi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="SI32"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[63:32] + a[31:0]
+dst[63:32] := b[63:32] + b[31:0]
+	</operation>
+	<instruction name="PHADDW" form="mm, mm" xed="PHADDW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_hadds_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Horizontally add adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[31:16] + a[15:0])
+dst[31:16] := Saturate16(a[63:48] + a[47:32])
+dst[47:32] := Saturate16(b[31:16] + b[15:0])
+dst[63:48] := Saturate16(b[63:48] + b[47:32])
+	</operation>
+	<instruction name="PHADDSW" form="mm, mm" xed="PHADDSW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_hsub_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[15:0] - a[31:16]
+dst[31:16] := a[47:32] - a[63:48]
+dst[47:32] := a[79:64] - a[95:80]
+dst[63:48] := a[111:96] - a[127:112]
+dst[79:64] := b[15:0] - b[31:16]
+dst[95:80] := b[47:32] - b[63:48]
+dst[111:96] := b[79:64] - b[95:80]
+dst[127:112] := b[111:96] - b[127:112]
+	</operation>
+	<instruction name="PHSUBW" form="xmm, xmm" xed="PHSUBW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_hsubs_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[15:0] - a[31:16])
+dst[31:16] := Saturate16(a[47:32] - a[63:48])
+dst[47:32] := Saturate16(a[79:64] - a[95:80])
+dst[63:48] := Saturate16(a[111:96] - a[127:112])
+dst[79:64] := Saturate16(b[15:0] - b[31:16])
+dst[95:80] := Saturate16(b[47:32] - b[63:48])
+dst[111:96] := Saturate16(b[79:64] - b[95:80])
+dst[127:112] := Saturate16(b[111:96] - b[127:112])
+	</operation>
+	<instruction name="PHSUBSW" form="xmm, xmm" xed="PHSUBSW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_hsub_epi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := a[95:64] - a[127:96]
+dst[95:64] := b[31:0] - b[63:32]
+dst[127:96] := b[95:64] - b[127:96]
+	</operation>
+	<instruction name="PHSUBD" form="xmm, xmm" xed="PHSUBD_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_hsub_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := a[15:0] - a[31:16]
+dst[31:16] := a[47:32] - a[63:48]
+dst[47:32] := b[15:0] - b[31:16]
+dst[63:48] := b[47:32] - b[63:48]
+	</operation>
+	<instruction name="PHSUBW" form="mm, mm" xed="PHSUBW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_hsub_pi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="SI32"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst".</description>
+	<operation>
+dst[31:0] := a[31:0] - a[63:32]
+dst[63:32] := b[31:0] - b[63:32]
+	</operation>
+	<instruction name="PHSUBD" form="mm, mm" xed="PHSUBD_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_hsubs_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Horizontally subtract adjacent pairs of signed 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst".</description>
+	<operation>
+dst[15:0] := Saturate16(a[15:0] - a[31:16])
+dst[31:16] := Saturate16(a[47:32] - a[63:48])
+dst[47:32] := Saturate16(b[15:0] - b[31:16])
+dst[63:48] := Saturate16(b[47:32] - b[63:48])
+	</operation>
+	<instruction name="PHSUBSW" form="mm, mm" xed="PHSUBSW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_maddubs_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="SI16"/>
+	<parameter type="__m128i" varname="a" etype="UI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PMADDUBSW" form="xmm, xmm" xed="PMADDUBSW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_maddubs_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="SI16"/>
+	<parameter type="__m64" varname="a" etype="UI8"/>
+	<parameter type="__m64" varname="b" etype="SI8"/>
+	<description>Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	dst[i+15:i] := Saturate16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
+ENDFOR
+	</operation>
+	<instruction name="PMADDUBSW" form="mm, mm" xed="PMADDUBSW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_mulhrs_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst".</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+	</operation>
+	<instruction name="PMULHRSW" form="xmm, xmm" xed="PMULHRSW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_mulhrs_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst".</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	tmp[31:0] := ((SignExtend32(a[i+15:i]) * SignExtend32(b[i+15:i])) &gt;&gt; 14) + 1
+	dst[i+15:i] := tmp[16:1]
+ENDFOR
+	</operation>
+	<instruction name="PMULHRSW" form="mm, mm" xed="PMULHRSW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_sign_epi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI8"/>
+	<parameter type="__m128i" varname="a" etype="SI8"/>
+	<parameter type="__m128i" varname="b" etype="SI8"/>
+	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 15
+	i := j*8
+	IF b[i+7:i] &lt; 0
+		dst[i+7:i] := -(a[i+7:i])
+	ELSE IF b[i+7:i] == 0
+		dst[i+7:i] := 0
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSIGNB" form="xmm, xmm" xed="PSIGNB_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_sign_epi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI16"/>
+	<parameter type="__m128i" varname="a" etype="SI16"/>
+	<parameter type="__m128i" varname="b" etype="SI16"/>
+	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*16
+	IF b[i+15:i] &lt; 0
+		dst[i+15:i] := -(a[i+15:i])
+	ELSE IF b[i+15:i] == 0
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSIGNW" form="xmm, xmm" xed="PSIGNW_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" vexEq="TRUE" name="_mm_sign_epi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m128i" varname="dst" etype="UI32"/>
+	<parameter type="__m128i" varname="a" etype="SI32"/>
+	<parameter type="__m128i" varname="b" etype="SI32"/>
+	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*32
+	IF b[i+31:i] &lt; 0
+		dst[i+31:i] := -(a[i+31:i])
+	ELSE IF b[i+31:i] == 0
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSIGND" form="xmm, xmm" xed="PSIGND_XMMdq_XMMdq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_sign_pi8">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="UI8"/>
+	<parameter type="__m64" varname="a" etype="SI8"/>
+	<parameter type="__m64" varname="b" etype="SI8"/>
+	<description>Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 7
+	i := j*8
+	IF b[i+7:i] &lt; 0
+		dst[i+7:i] := -(a[i+7:i])
+	ELSE IF b[i+7:i] == 0
+		dst[i+7:i] := 0
+	ELSE
+		dst[i+7:i] := a[i+7:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSIGNB" form="mm, mm" xed="PSIGNB_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_sign_pi16">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="UI16"/>
+	<parameter type="__m64" varname="a" etype="SI16"/>
+	<parameter type="__m64" varname="b" etype="SI16"/>
+	<description>Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 3
+	i := j*16
+	IF b[i+15:i] &lt; 0
+		dst[i+15:i] := -(a[i+15:i])
+	ELSE IF b[i+15:i] == 0
+		dst[i+15:i] := 0
+	ELSE
+		dst[i+15:i] := a[i+15:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSIGNW" form="mm, mm" xed="PSIGNW_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="SSSE3" name="_mm_sign_pi32">
+	<type>Integer</type>
+	<CPUID>SSSE3</CPUID>
+	<category>Arithmetic</category>
+	<return type="__m64" varname="dst" etype="UI32"/>
+	<parameter type="__m64" varname="a" etype="SI32"/>
+	<parameter type="__m64" varname="b" etype="SI32"/>
+	<description>Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.</description>
+	<operation>
+FOR j := 0 to 1
+	i := j*32
+	IF b[i+31:i] &lt; 0
+		dst[i+31:i] := -(a[i+31:i])
+	ELSE IF b[i+31:i] == 0
+		dst[i+31:i] := 0
+	ELSE
+		dst[i+31:i] := a[i+31:i]
+	FI
+ENDFOR
+	</operation>
+	<instruction name="PSIGND" form="mm, mm" xed="PSIGND_MMXq_MMXq"/>
+	<header>tmmintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_rdtsc">
+	<CPUID>TSC</CPUID>
+	<category>General Support</category>
+	<return type="__int64" varname="dst" etype="UI64"/>
+	<parameter type="void"/>
+	<description>Copy the current 64-bit value of the processor's time-stamp counter into "dst".</description>
+	<operation>dst[63:0] := TimeStampCounter
+	</operation>
+	<instruction name="RDTSC" xed="RDTSC"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsusldtrk">
+	<CPUID>TSXLDTRK</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<description>Mark the start of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a transactional region, subsequent loads are not added to the read set of the transaction. If this is used inside a suspend load address tracking region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP.</description>
+	<instruction name="XSUSLDTRK" xed="XSUSLDTRK"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xresldtrk">
+	<CPUID>TSXLDTRK</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<description>Mark the end of a TSX (HLE/RTM) suspend load address tracking region. If this is used inside a suspend load address tracking region it will end the suspend region and all following load addresses will be added to the transaction read set. If this is used inside an active transaction but not in a suspend region it will cause transaction abort. If this is used outside of a transactional region it behaves like a NOP.</description>
+	<instruction name="XRESLDTRK" xed="XRESLDTRK"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_aesenclast_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="__m256i" varname="RoundKey" etype="M128"/>
+	<description>Perform the last round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"."</description>
+	<operation>FOR j := 0 to 1
+	i := j*128
+	a[i+127:i] := ShiftRows(a[i+127:i])
+	a[i+127:i] := SubBytes(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VAESENCLAST" form="ymm, ymm" xed="VAESENCLAST_YMMu128_YMMu128_YMMu128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_aesenc_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="__m256i" varname="RoundKey" etype="M128"/>
+	<description>Perform one round of an AES encryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst"."</description>
+	<operation>FOR j := 0 to 1
+	i := j*128
+	a[i+127:i] := ShiftRows(a[i+127:i])
+	a[i+127:i] := SubBytes(a[i+127:i])
+	a[i+127:i] := MixColumns(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VAESENC" form="ymm, ymm" xed="VAESENC_YMMu128_YMMu128_YMMu128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_aesdeclast_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="__m256i" varname="RoundKey" etype="M128"/>
+	<description>Perform the last round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*128
+	a[i+127:i] := InvShiftRows(a[i+127:i])
+	a[i+127:i] := InvSubBytes(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VAESDECLAST" form="ymm, ymm" xed="VAESDECLAST_YMMu128_YMMu128_YMMu128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_aesdec_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>VAES</CPUID>
+	<category>Cryptography</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="a" etype="M128"/>
+	<parameter type="__m256i" varname="RoundKey" etype="M128"/>
+	<description>Perform one round of an AES decryption flow on data (state) in "a" using the round key in "RoundKey", and store the results in "dst".</description>
+	<operation>FOR j := 0 to 1
+	i := j*128
+	a[i+127:i] := InvShiftRows(a[i+127:i])
+	a[i+127:i] := InvSubBytes(a[i+127:i])
+	a[i+127:i] := InvMixColumns(a[i+127:i])
+	dst[i+127:i] := a[i+127:i] XOR RoundKey[i+127:i]
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VAESDEC" form="ymm, ymm" xed="VAESDEC_YMMu128_YMMu128_YMMu128"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm512_clmulepi64_epi128">
+	<type>Integer</type>
+	<CPUID>VPCLMULQDQ</CPUID>
+	<category>Application-Targeted</category>
+	<return type="__m512i" varname="dst" etype="M128"/>
+	<parameter type="__m512i" varname="b" etype="M128"/>
+	<parameter type="__m512i" varname="c" etype="M128"/>
+	<parameter type="const int" varname="Imm8" etype="IMM" immwidth="8"/>
+	<description>Carry-less multiplication of one quadword of
+		'b' by one quadword of 'c', stores
+		the 128-bit result in 'dst'. The immediate 'Imm8' is
+		used to determine which quadwords of 'b'
+		and 'c' should be used.</description>
+	<operation>
+DEFINE PCLMUL128(X,Y) {
+	FOR i := 0 to 63
+		TMP[i] := X[ 0 ] and Y[ i ]
+		FOR j := 1 to i
+			TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ])
+		ENDFOR
+		DEST[ i ] := TMP[ i ]
+	ENDFOR
+	FOR i := 64 to 126
+		TMP[i] := 0
+		FOR j := i - 63 to 63
+			TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ])
+		ENDFOR
+		DEST[ i ] := TMP[ i ]
+	ENDFOR
+	DEST[127] := 0
+	RETURN DEST // 128b vector
+}
+FOR i := 0 to 3
+	IF Imm8[0] == 0
+		TEMP1 := b.m128[i].qword[0]
+	ELSE
+		TEMP1 := b.m128[i].qword[1]
+	FI
+	IF Imm8[4] == 0
+		TEMP2 := c.m128[i].qword[0]
+	ELSE
+		TEMP2 := c.m128[i].qword[1]
+	FI
+	dst.m128[i] := PCLMUL128(TEMP1, TEMP2)
+ENDFOR
+dst[MAX:512] := 0
+	</operation>
+	<instruction name="VPCLMULQDQ" form="zmm, zmm, zmm, imm8" xed="VPCLMULQDQ_ZMMu128_ZMMu64_ZMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_mm256_clmulepi64_epi128">
+	<type>Integer</type>
+	<CPUID>AVX512VL</CPUID>
+	<CPUID>VPCLMULQDQ</CPUID>
+	<category>Application-Targeted</category>
+	<return type="__m256i" varname="dst" etype="M128"/>
+	<parameter type="__m256i" varname="b" etype="M128"/>
+	<parameter type="__m256i" varname="c" etype="M128"/>
+	<parameter type="const int" varname="Imm8" etype="IMM" immwidth="8"/>
+	<description>Carry-less multiplication of one quadword of
+		'b' by one quadword of 'c', stores
+		the 128-bit result in 'dst'. The immediate 'Imm8' is
+		used to determine which quadwords of 'b'
+		and 'c' should be used.</description>
+	<operation>
+DEFINE PCLMUL128(X,Y) {
+	FOR i := 0 to 63
+		TMP[i] := X[ 0 ] and Y[ i ]
+		FOR j := 1 to i
+			TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ])
+		ENDFOR
+		DEST[ i ] := TMP[ i ]
+	ENDFOR
+	FOR i := 64 to 126
+		TMP[i] := 0
+		FOR j := i - 63 to 63
+			TMP[i] := TMP[i] xor (X[ j ] and Y[ i - j ])
+		ENDFOR
+		DEST[ i ] := TMP[ i ]
+	ENDFOR
+	DEST[127] := 0
+	RETURN DEST // 128b vector
+}
+FOR i := 0 to 1
+	IF Imm8[0] == 0
+		TEMP1 := b.m128[i].qword[0]
+	ELSE
+		TEMP1 := b.m128[i].qword[1]
+	FI
+	IF Imm8[4] == 0
+		TEMP2 := c.m128[i].qword[0]
+	ELSE
+		TEMP2 := c.m128[i].qword[1]
+	FI
+	dst.m128[i] := PCLMUL128(TEMP1, TEMP2)
+ENDFOR
+dst[MAX:256] := 0
+	</operation>
+	<instruction name="VPCLMULQDQ" form="ymm, ymm, ymm, imm8" xed="VPCLMULQDQ_YMMu128_YMMu64_YMMu64_IMM8_AVX512"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_tpause">
+	<type>Flag</type>
+	<CPUID>WAITPKG</CPUID>
+	<category>Miscellaneous</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned int" varname="ctrl" etype="UI32"/>
+	<parameter type="unsigned __int64" varname="counter" etype="UI64"/>
+	<description>Directs the processor to enter an implementation-dependent optimized state until the TSC reaches or exceeds the value specified in "counter". Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF).</description>
+	<instruction name="TPAUSE" form="r32" xed="TPAUSE_GPR32u32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_umwait">
+	<type>Flag</type>
+	<CPUID>WAITPKG</CPUID>
+	<category>Miscellaneous</category>
+	<return type="unsigned char" varname="dst" etype="UI8"/>
+	<parameter type="unsigned int" varname="ctrl" etype="UI32"/>
+	<parameter type="unsigned __int64" varname="counter" etype="UI64"/>
+	<description>Directs the processor to enter an implementation-dependent optimized state while monitoring a range of addresses. The instruction wakes up when the TSC reaches or exceeds the value specified in "counter" (if the monitoring hardware did not trigger beforehand). Bit 0 of "ctrl" selects between a lower power (cleared) or faster wakeup (set) optimized state. Returns the carry flag (CF).</description>
+	<instruction name="UMWAIT" form="r32" xed="UMWAIT_GPR32"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_umonitor">
+	<CPUID>WAITPKG</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void*" varname="a"/>
+	<description>Sets up a linear address range to be
+		monitored by hardware and activates the
+		monitor. The address range should be a writeback
+		memory caching type. The address is
+		contained in "a".</description>
+	<instruction name="UMONITOR" form="r16/r32/r64" xed="UMONITOR_GPRa"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_wbnoinvd">
+	<CPUID>WBNOINVD</CPUID>
+	<category>Miscellaneous</category>
+	<return type="void"/>
+	<parameter type="void"/>
+	<description>Write back and do not flush internal caches.
+		Initiate writing-back without flushing of external
+		caches.</description>
+	<instruction name="WBNOINVD" xed="WBNOINVD"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xgetbv">
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<return type="unsigned __int64" varname="dst" etype="UI64"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<description>Copy up to 64-bits from the value of the extended control register (XCR) specified by "a" into "dst". Currently only XFEATURE_ENABLED_MASK XCR is supported.</description>
+	<operation>dst[63:0] := XCR[a]
+	</operation>
+	<instruction name="XGETBV" xed="XGETBV"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xrstor">
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="rs_mask" etype="UI64"/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>st_mask := mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XRSTOR" form="m8" xed="XRSTOR_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xrstor64">
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="rs_mask" etype="UI64"/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>st_mask := mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XRSTOR64" form="m8" xed="XRSTOR64_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsave">
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVE" form="m8" xed="XSAVE_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsave64">
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVE64" form="m8" xed="XSAVE64_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsaveopt">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEOPT</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE instruction.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVEOPT" form="m8" xed="XSAVEOPT_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsaveopt64">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEOPT</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr". State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary. The hardware may optimize the manner in which data is saved. The performance of this instruction will be equal to or better than using the XSAVE64 instruction.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		2: mem_addr.EXT_SAVE_Area2[YMM] := ProcessorState[YMM]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVEOPT64" form="m8" xed="XSAVEOPT64_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsetbv">
+	<CPUID>XSAVE</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="unsigned int" varname="a" etype="UI32"/>
+	<parameter type="unsigned __int64" varname="val" etype="UI64"/>
+	<description>Copy 64-bits from "val" to the extended control register (XCR) specified by "a". Currently only XFEATURE_ENABLED_MASK XCR is supported.</description>
+	<operation>
+XCR[a] := val[63:0]
+	</operation>
+	<instruction name="XSETBV" xed="XSETBV"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsavec">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEC</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVEC" form="m8" xed="XSAVEC_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsaves">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVES" form="m8" xed="XSAVES_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsavec64">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSAVEC</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsavec differs from xsave in that it uses compaction and that it may use init optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVEC64" form="m8" xed="XSAVEC64_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xsaves64">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="save_mask" etype="UI64"/>
+	<description>Perform a full or partial save of the enabled processor states to memory at "mem_addr"; xsaves differs from xsave in that it can save state components corresponding to bits set in IA32_XSS MSR and that it may use the modified optimization. State is saved based on bits [62:0] in "save_mask" and "XCR0". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>mask[62:0] := save_mask[62:0] AND XCR0[62:0]
+FOR i := 0 to 62
+	IF mask[i]
+		CASE (i) OF
+		0: mem_addr.FPUSSESave_Area[FPU] := ProcessorState[x87_FPU]
+		1: mem_addr.FPUSSESaveArea[SSE] := ProcessorState[SSE]
+		DEFAULT: mem_addr.Ext_Save_Area[i] := ProcessorState[i]
+		ESAC
+		mem_addr.HEADER.XSTATE_BV[i] := INIT_FUNCTION[i]
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XSAVEC64" form="m8" xed="XSAVEC64_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xrstors">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="const void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="rs_mask" etype="UI64"/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>st_mask := mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XRSTORS" form="m8" xed="XRSTORS_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+<intrinsic tech="Other" name="_xrstors64">
+	<CPUID>XSAVE</CPUID>
+	<CPUID>XSS</CPUID>
+	<category>OS-Targeted</category>
+	<return type="void"/>
+	<parameter type="const void *" varname="mem_addr"/>
+	<parameter type="unsigned __int64" varname="rs_mask" etype="UI64"/>
+	<description>Perform a full or partial restore of the enabled processor states using the state information stored in memory at "mem_addr". xrstors differs from xrstor in that it can restore state components corresponding to bits set in the IA32_XSS MSR; xrstors cannot restore from an xsave area in which the extended region is in the standard form. State is restored based on bits [62:0] in "rs_mask", "XCR0", and "mem_addr.HEADER.XSTATE_BV". "mem_addr" must be aligned on a 64-byte boundary.</description>
+	<operation>st_mask := mem_addr.HEADER.XSTATE_BV[62:0]
+FOR i := 0 to 62
+	IF (rs_mask[i] AND XCR0[i])
+		IF st_mask[i]
+			CASE (i) OF
+			0: ProcessorState[x87_FPU] := mem_addr.FPUSSESave_Area[FPU]
+			1: ProcessorState[SSE] := mem_addr.FPUSSESaveArea[SSE]
+			DEFAULT: ProcessorState[i] := mem_addr.Ext_Save_Area[i]
+			ESAC
+		ELSE
+			// ProcessorExtendedState := Processor Supplied Values
+			CASE (i) OF
+			1: MXCSR := mem_addr.FPUSSESave_Area[SSE]
+			ESAC
+		FI
+	FI
+	i := i + 1
+ENDFOR
+	</operation>
+	<instruction name="XRSTORS64" form="m8" xed="XRSTORS64_MEMmxsave"/>
+	<header>immintrin.h</header>
+</intrinsic>
+</intrinsics_list>
+\ No newline at end of file